@@ -300,6 +300,7 @@ void DFT2D1gpu(float* din, thrust::complex<float>* dout, int num_rows, int num_c
300
300
thrust::complex<float > sum, twiddle;
301
301
angle = -2 .0f * PI * fdividef ((float )i, (float )num_cols);
302
302
sum = 0 .0f ;
303
+ #pragma unroll
303
304
for (int k = 0 ; k < num_cols; ++k) {
304
305
// sincosf(angle * k, &sinf, &cosf);
305
306
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -324,6 +325,7 @@ void DFT2D2gpu(thrust::complex<float>* din, thrust::complex<float>* dout, int nu
324
325
thrust::complex<float > sum, twiddle;
325
326
angle = -2 .0f * PI * fdividef ((float )i, (float )num_cols);
326
327
sum = 0 .0f ;
328
+ #pragma unroll
327
329
for (int k = 0 ; k < num_cols; ++k) {
328
330
// sincosf(angle * k, &sinf, &cosf);
329
331
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -348,6 +350,7 @@ void iDFT2D1gpu(thrust::complex<float>* din, thrust::complex<float>* dout, int n
348
350
thrust::complex<float > sum, twiddle;
349
351
angle = 2 .0f * PI * fdividef ((float )i, (float )num_cols);
350
352
sum = 0 .0f ;
353
+ #pragma unroll
351
354
for (int k = 0 ; k < num_cols; ++k) {
352
355
// sincosf(angle * k, &sinf, &cosf);
353
356
// twiddle = thrust::complex<float>(cosf, sinf);
@@ -376,6 +379,7 @@ void iDFT2D2gpu(thrust::complex<float>* din, float* dout, int num_rows, int num_
376
379
thrust::complex<float > twiddle;
377
380
angle = 2 .0f * PI * fdividef ((float )i, (float )num_cols);
378
381
sum = 0 .0f ;
382
+ #pragma unroll
379
383
for (int k = 0 ; k < num_cols; ++k) {
380
384
// sincosf(angle * k, &sinf, &cosf);
381
385
// twiddle = thrust::complex<float>(cosf, sinf);
0 commit comments