5 #if !defined(YAKL_ARCH_CUDA) && !defined(YAKL_ARCH_HIP) && !defined(YAKL_ARCH_SYCL)
6 #define POCKETFFT_CACHE_SIZE 2
7 #define POCKETFFT_NO_MULTITHREADING
8 #include "pocketfft_hdronly.h"
57 #if defined(YAKL_ARCH_CUDA)
58 cufftHandle plan_forward;
59 cufftHandle plan_inverse;
60 #define CHECK(func) { int myloc = func; if (myloc != CUFFT_SUCCESS) { std::cerr << "ERROR: YAKL CUFFT: " << __FILE__ << ": " <<__LINE__ << std::endl; yakl_throw(""); } }
61 #elif defined(YAKL_ARCH_HIP)
62 rocfft_plan plan_forward;
63 rocfft_plan plan_inverse;
64 #define CHECK(func) { int myloc = func; if (myloc != rocfft_status_success) { std::cerr << "ERROR: YAKL ROCFFT: " << __FILE__ << ": " <<__LINE__ << std::endl; yakl_throw(""); } }
65 #elif defined(YAKL_ARCH_SYCL)
66 typedef oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::SINGLE, oneapi::mkl::dft::domain::REAL> desc_single_t;
67 typedef oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::REAL> desc_double_t;
68 void *plan_forward, *plan_inverse;
69 #define CHECK(func) { \
73 catch (oneapi::mkl::exception const &ex) { \
74 std::cerr << "ERROR: YAKL ONEMKL-FFT: " << __FILE__ << " : " << __LINE__ \
75 << std::endl; yakl_throw(""); \
80 void nullify() {batch_size = -1; transform_size = -1; trdim = -1;}
87 if (transform_size != -1) {
88 #if defined(YAKL_ARCH_CUDA)
89 CHECK( cufftDestroy( plan_forward ) );
90 CHECK( cufftDestroy( plan_inverse ) );
91 #elif defined(YAKL_ARCH_HIP)
92 CHECK( rocfft_plan_destroy( plan_forward ) );
93 CHECK( rocfft_plan_destroy( plan_inverse ) );
106 int idist = arr.extent(trdim);
107 int odist = idist / 2;
108 int batch = arr.totElems() / arr.extent(trdim);
109 int *inembed =
nullptr;
110 int *onembed =
nullptr;
112 #if defined(YAKL_ARCH_CUDA)
113 if constexpr (std::is_same<T,float >::value) {
114 CHECK( cufftPlanMany(&plan_forward, rank, &n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, batch) );
115 CHECK( cufftPlanMany(&plan_inverse, rank, &n, onembed, ostride, odist, inembed, istride, idist, CUFFT_C2R, batch) );
116 }
else if constexpr (std::is_same<T,double>::value) {
117 CHECK( cufftPlanMany(&plan_forward, rank, &n, inembed, istride, idist, onembed, ostride, odist, CUFFT_D2Z, batch) );
118 CHECK( cufftPlanMany(&plan_inverse, rank, &n, onembed, ostride, odist, inembed, istride, idist, CUFFT_Z2D, batch) );
120 #elif defined(YAKL_ARCH_HIP)
121 size_t len = tr_size;
122 size_t const roc_istride = istride;
123 size_t const roc_ostride = ostride;
124 size_t const roc_idist = idist;
125 size_t const roc_odist = odist;
126 size_t const roc_off = 0;
127 rocfft_plan_description desc;
128 if constexpr (std::is_same<T,float >::value) {
129 CHECK( rocfft_plan_description_create( &desc ) );
130 CHECK( rocfft_plan_description_set_data_layout( desc, rocfft_array_type_real, rocfft_array_type_hermitian_interleaved, &roc_off, &roc_off, (
size_t) 1, &roc_istride, roc_idist, (
size_t) 1, &roc_ostride, roc_odist ) );
131 CHECK( rocfft_plan_create(&plan_forward, rocfft_placement_inplace, rocfft_transform_type_real_forward, rocfft_precision_single, (
size_t) 1, &len, (
size_t) batch, desc) );
132 CHECK( rocfft_plan_description_destroy( desc ) );
133 CHECK( rocfft_plan_description_create( &desc ) );
134 CHECK( rocfft_plan_description_set_data_layout( desc, rocfft_array_type_hermitian_interleaved, rocfft_array_type_real, &roc_off, &roc_off, (
size_t) 1, &roc_ostride, roc_odist, (
size_t) 1, &roc_istride, roc_idist ) );
135 CHECK( rocfft_plan_create(&plan_inverse, rocfft_placement_inplace, rocfft_transform_type_real_inverse, rocfft_precision_single, (
size_t) 1, &len, (
size_t) batch, desc) );
136 CHECK( rocfft_plan_description_destroy( desc ) );
137 }
else if constexpr (std::is_same<T,double>::value) {
138 CHECK( rocfft_plan_description_create( &desc ) );
139 CHECK( rocfft_plan_description_set_data_layout( desc, rocfft_array_type_real, rocfft_array_type_hermitian_interleaved, &roc_off, &roc_off, (
size_t) 1, &roc_istride, roc_idist, (
size_t) 1, &roc_ostride, roc_odist ) );
140 CHECK( rocfft_plan_create(&plan_forward, rocfft_placement_inplace, rocfft_transform_type_real_forward, rocfft_precision_double, (
size_t) 1, &len, (
size_t) batch, desc) );
141 CHECK( rocfft_plan_description_destroy( desc ) );
142 CHECK( rocfft_plan_description_create( &desc ) );
143 CHECK( rocfft_plan_description_set_data_layout( desc, rocfft_array_type_hermitian_interleaved, rocfft_array_type_real, &roc_off, &roc_off, (
size_t) 1, &roc_ostride, roc_odist, (
size_t) 1, &roc_istride, roc_idist ) );
144 CHECK( rocfft_plan_create(&plan_inverse, rocfft_placement_inplace, rocfft_transform_type_real_inverse, rocfft_precision_double, (
size_t) 1, &len, (
size_t) batch, desc) );
145 CHECK( rocfft_plan_description_destroy( desc ) );
147 #elif defined(YAKL_ARCH_SYCL)
148 if constexpr (std::is_same_v<T,float>) {
149 plan_forward =
new desc_single_t(n);
150 plan_inverse =
new desc_single_t(n);
152 CHECK(
static_cast<desc_single_t*
>(plan_forward)->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batch) );
153 CHECK(
static_cast<desc_single_t*
>(plan_forward)->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, idist) );
154 CHECK(
static_cast<desc_single_t*
>(plan_forward)->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, odist) );
156 CHECK(
static_cast<desc_single_t*
>(plan_inverse)->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batch) );
157 CHECK(
static_cast<desc_single_t*
>(plan_inverse)->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, odist) );
158 CHECK(
static_cast<desc_single_t*
>(plan_inverse)->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, idist) );
160 CHECK(
static_cast<desc_single_t*
>(plan_forward)->commit( sycl_default_stream() ) );
161 CHECK(
static_cast<desc_single_t*
>(plan_inverse)->commit( sycl_default_stream() ) );
162 }
else if constexpr (std::is_same_v<T,double>) {
163 plan_forward =
new desc_double_t(n);
164 plan_inverse =
new desc_double_t(n);
166 CHECK(
static_cast<desc_double_t*
>(plan_forward)->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batch) );
167 CHECK(
static_cast<desc_double_t*
>(plan_forward)->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, idist) );
168 CHECK(
static_cast<desc_double_t*
>(plan_forward)->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, odist) );
170 CHECK(
static_cast<desc_double_t*
>(plan_inverse)->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, batch) );
171 CHECK(
static_cast<desc_double_t*
>(plan_inverse)->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, idist) );
172 CHECK(
static_cast<desc_double_t*
>(plan_inverse)->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, odist) );
174 CHECK(
static_cast<desc_double_t*
>(plan_forward)->commit( sycl_default_stream() ) );
175 CHECK(
static_cast<desc_double_t*
>(plan_inverse)->commit( sycl_default_stream() ) );
180 this->batch_size = batch ;
181 this->transform_size = tr_size;
182 this->trdim = trdim ;
192 if (trdim < 0 || transform_size < 0 || batch_size < 0) {
193 if (trdim_in < 0 || transform_size_in < 0)
yakl_throw(
"ERROR: Using forward_real before calling init without "
194 "specifying both trdim_in and transform_size_in");
195 init( arr , trdim_in , transform_size_in );
198 if ( ( (transform_size%2==1 ? transform_size+1 : transform_size+2) != arr.extent(trdim) ) ||
199 ( transform_size_in > 0 && transform_size_in != transform_size ) ||
200 ( arr.totElems() / arr.extent(trdim) != batch_size ) ) {
201 if (trdim_in < 0 || transform_size_in < 0)
yakl_throw(
"ERROR: Changing transform size or batch sizewithout "
202 "specifying both trdim_in and transform_size_in");
204 init( arr , trdim_in , transform_size_in );
206 auto dims = arr.get_dimensions();
207 int d2 = 1;
for (
int i=N-1; i > trdim; i--) { d2 *= dims(i); }
208 int d1 = dims(trdim);
209 int d0 = arr.totElems() / d2 / d1;
212 copy = arr.reshape(d0,d2,d1);
214 auto in = arr.reshape(d0,d1,d2);
215 copy = arr.createDeviceCopy().reshape(d0,d2,d1);
219 #if defined(YAKL_ARCH_CUDA)
220 if constexpr (std::is_same<T,float >::value) {
221 CHECK( cufftExecR2C(plan_forward, (cufftReal *) copy.data(), (cufftComplex *) copy.data()) );
222 }
else if constexpr (std::is_same<T,double>::value) {
223 CHECK( cufftExecD2Z(plan_forward, (cufftDoubleReal *) copy.data(), (cufftDoubleComplex *) copy.data()) );
225 #elif defined(YAKL_ARCH_HIP)
226 std::array<double *,1> ibuf( {copy.data()} );
227 std::array<double *,1> obuf( {copy.data()} );
228 rocfft_execution_info info;
229 CHECK( rocfft_execution_info_create( &info ) );
230 CHECK( rocfft_execute(plan_forward, (
void **) ibuf.data(), (
void **) obuf.data(), info) );
231 CHECK( rocfft_execution_info_destroy( info ) );
232 #elif defined(YAKL_ARCH_SYCL)
233 if constexpr (std::is_same<T,float >::value) {
234 CHECK( oneapi::mkl::dft::compute_forward(*(
static_cast<desc_single_t*
>(plan_forward)),
static_cast<T*
>(copy.data())) );
235 }
else if constexpr (std::is_same<T,double>::value) {
236 CHECK( oneapi::mkl::dft::compute_forward(*(
static_cast<desc_double_t*
>(plan_forward)),
static_cast<T*
>(copy.data())) );
241 auto copy_host = copy.createHostCopy();
242 for (
int i0 = 0; i0 < d0; i0++) {
243 for (
int i2 = 0; i2 < d2; i2++) {
244 for (
int i1 = 0; i1 < transform_size; i1++) {
245 pfft_in(i0,i2,i1) = copy_host(i0,i2,i1);
249 using pocketfft::detail::shape_t;
250 using pocketfft::detail::stride_t;
251 shape_t shape_in (3);
for (
int i=0; i < 3; i++) { shape_in[i] = pfft_in.extent(i); }
252 stride_t stride_in (3);
253 stride_t stride_out(3);
254 stride_in [0] = d2* transform_size *
sizeof( T );
255 stride_in [1] = transform_size *
sizeof( T );
256 stride_in [2] =
sizeof( T );
257 stride_out[0] = d2*((transform_size+2)/2)*
sizeof(std::complex<T>);
258 stride_out[1] = ((transform_size+2)/2)*
sizeof(std::complex<T>);
259 stride_out[2] =
sizeof(std::complex<T>);
260 pocketfft::r2c<T>(shape_in, stride_in, stride_out, (
size_t) 2,
true, pfft_in.data(), pfft_out.data(), (T) 1);
261 for (
int i0 = 0; i0 < d0; i0++) {
262 for (
int i2 = 0; i2 < d2; i2++) {
263 for (
int i1 = 0; i1 < (transform_size+2)/2; i1++) {
264 copy_host(i0,i2,2*i1 ) = pfft_out(i0,i2,i1).real();
265 copy_host(i0,i2,2*i1+1) = pfft_out(i0,i2,i1).imag();
269 copy_host.deep_copy_to( copy );
273 auto out = arr.reshape(d0,d1,d2);
286 if (trdim < 0 || transform_size < 0 || batch_size < 0) {
287 if (trdim_in < 0 || transform_size_in < 0)
yakl_throw(
"ERROR: Using forward_real before calling init without "
288 "specifying both trdim_in and transform_size_in");
289 init( arr , trdim_in , transform_size_in );
292 if ( ( (transform_size%2==1 ? transform_size+1 : transform_size+2) != arr.extent(trdim) ) ||
293 ( transform_size_in > 0 && transform_size_in != transform_size ) ||
294 ( arr.totElems() / arr.extent(trdim) != batch_size ) ) {
295 if (trdim_in < 0 || transform_size_in < 0)
yakl_throw(
"ERROR: Changing transform size or batch sizewithout "
296 "specifying both trdim_in and transform_size_in");
298 init( arr , trdim_in , transform_size_in );
300 auto dims = arr.get_dimensions();
301 int d2 = 1;
for (
int i=N-1; i > trdim; i--) { d2 *= dims(i); }
302 int d1 = dims(trdim);
303 int d0 = arr.totElems() / d2 / d1;
306 copy = arr.reshape(d0,d2,d1);
308 auto in = arr.reshape(d0,d1,d2);
309 copy = arr.createDeviceCopy().reshape(d0,d2,d1);
313 #if defined(YAKL_ARCH_CUDA)
314 if constexpr (std::is_same<T,float >::value) {
315 CHECK( cufftExecC2R(plan_inverse, (cufftComplex *) copy.data(), (cufftReal *) copy.data()) );
316 }
else if constexpr (std::is_same<T,double>::value) {
317 CHECK( cufftExecZ2D(plan_inverse, (cufftDoubleComplex *) copy.data(), (cufftDoubleReal *) copy.data()) );
319 #elif defined(YAKL_ARCH_HIP)
320 std::array<double *,1> ibuf( {copy.data()} );
321 std::array<double *,1> obuf( {copy.data()} );
322 rocfft_execution_info info;
323 CHECK( rocfft_execution_info_create( &info ) );
324 CHECK( rocfft_execute(plan_inverse, (
void **) ibuf.data(), (
void **) obuf.data(), info) );
325 CHECK( rocfft_execution_info_destroy( info ) );
326 #elif defined(YAKL_ARCH_SYCL)
327 if constexpr (std::is_same<T,float >::value) {
328 CHECK( oneapi::mkl::dft::compute_backward(*(
static_cast<desc_single_t*
>(plan_inverse)),
static_cast<T*
>(copy.data())) );
329 }
else if constexpr (std::is_same<T,double>::value) {
330 CHECK( oneapi::mkl::dft::compute_backward(*(
static_cast<desc_double_t*
>(plan_inverse)),
static_cast<T*
>(copy.data())) );
335 auto copy_host = copy.createHostCopy();
336 for (
int i0 = 0; i0 < d0; i0++) {
337 for (
int i2 = 0; i2 < d2; i2++) {
338 for (
int i1 = 0; i1 < (transform_size+2)/2; i1++) {
339 pfft_in(i0,i2,i1) = std::complex<T>( copy_host(i0,i2,2*i1 ) , copy_host(i0,i2,2*i1+1) );
343 using pocketfft::detail::shape_t;
344 using pocketfft::detail::stride_t;
345 shape_t shape_out (3);
for (
int i=0; i < 3; i++) { shape_out [i] = pfft_out.extent(i); }
346 stride_t stride_in (3);
347 stride_t stride_out(3);
348 stride_in [0] = d2*((transform_size+2)/2)*
sizeof(std::complex<T>);
349 stride_in [1] = ((transform_size+2)/2)*
sizeof(std::complex<T>);
350 stride_in [2] =
sizeof(std::complex<T>);
351 stride_out[0] = d2* transform_size *
sizeof( T );
352 stride_out[1] = transform_size *
sizeof( T );
353 stride_out[2] =
sizeof( T );
354 pocketfft::c2r<T>(shape_out, stride_in, stride_out, (
size_t) 2,
false, pfft_in.data() , pfft_out.data() , (T) 1 );
355 for (
int i0 = 0; i0 < d0; i0++) {
356 for (
int i2 = 0; i2 < d2; i2++) {
357 for (
int i1 = 0; i1 < transform_size; i1++) {
358 copy_host(i0,i2,i1) = pfft_out(i0,i2,i1);
362 copy_host.deep_copy_to( copy );
365 auto out = arr.reshape(d0,d1,d2);
366 YAKL_SCOPE( transform_size , this->transform_size );