@@ -373,125 +373,4 @@ void nbl_glsl_ext_FFT(bool is_inverse)
373
373
}
374
374
}
375
375
376
- // REMOVE THESE 3 commits later :D
377
- uint nbl_glsl_ext_FFT_calculateTwiddlePower_OLD(in uint threadId, in uint iteration, in uint logTwoN)
378
- {
379
- const uint shiftSuffix = logTwoN - 1u - iteration;
380
- const uint suffixMask = (2u << iteration) - 1u;
381
- return (threadId & suffixMask) << shiftSuffix;
382
- }
383
-
384
- nbl_glsl_complex nbl_glsl_ext_FFT_twiddle_OLD(in uint threadId, in uint iteration, in uint logTwoN)
385
- {
386
- uint k = nbl_glsl_ext_FFT_calculateTwiddlePower(threadId, iteration, logTwoN);
387
- return nbl_glsl_expImaginary(- 1 .0f * 2 .0f * nbl_glsl_PI * float (k) / float (1 << logTwoN));
388
- }
389
-
390
- nbl_glsl_complex nbl_glsl_ext_FFT_twiddleInverse_OLD(in uint threadId, in uint iteration, in uint logTwoN)
391
- {
392
- return nbl_glsl_complex_conjugate(nbl_glsl_ext_FFT_twiddle(threadId, iteration, logTwoN));
393
- }
394
-
395
- uint nbl_glsl_ext_FFT_getChannel_OLD()
396
- {
397
- uint direction = nbl_glsl_ext_FFT_Parameters_t_getDirection();
398
- return gl_WorkGroupID[direction];
399
- }
400
-
401
- void nbl_glsl_ext_FFT_OLD(bool is_inverse)
402
- {
403
- // Virtual Threads Calculation
404
- uint dataLength = nbl_glsl_ext_FFT_getDimLength(nbl_glsl_ext_FFT_Parameters_t_getPaddedDimensions());
405
- uint num_virtual_threads = (dataLength- 1u)/ (_NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_)+ 1u;
406
- uint thread_offset = gl_LocalInvocationIndex;
407
-
408
- uint channel = nbl_glsl_ext_FFT_getChannel_OLD();
409
-
410
- // Pass 0: Bit Reversal
411
- uint leadingZeroes = nbl_glsl_clz(dataLength) + 1u;
412
- uint logTwo = 32u - leadingZeroes;
413
-
414
- nbl_glsl_complex current_values[_NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD];
415
- nbl_glsl_complex shuffled_values[_NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD];
416
-
417
- // Load Initial Values into Local Mem (bit reversed indices)
418
- for (uint t = 0u; t < num_virtual_threads; t++ )
419
- {
420
- uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
421
- uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
422
- uvec3 bitReversedCoords = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords, leadingZeroes);
423
-
424
- current_values[t] = nbl_glsl_ext_FFT_getPaddedData(bitReversedCoords, channel);
425
- }
426
-
427
- // For loop for each stage of the FFT (each virtual thread computes 1 buttefly wing)
428
- for (uint i = 0u; i < logTwo; ++ i)
429
- {
430
- uint mask = 1u << i;
431
-
432
- // Data Exchange for virtual threads :
433
- // X and Y are seperate to use less shared memory for complex numbers
434
- // Get Shuffled Values X for virtual threads
435
- for (uint t = 0u; t < num_virtual_threads; t++ )
436
- {
437
- uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
438
- _NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid] = floatBitsToUint(current_values[t].x);
439
- }
440
- barrier();
441
- memoryBarrierShared();
442
- for (uint t = 0u; t < num_virtual_threads; t++ )
443
- {
444
- uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
445
- shuffled_values[t].x = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid ^ mask]);
446
- }
447
-
448
- barrier();
449
- memoryBarrierShared();
450
-
451
- // Get Shuffled Values Y for virtual threads
452
- for (uint t = 0u; t < num_virtual_threads; t++ )
453
- {
454
- uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
455
- _NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid] = floatBitsToUint(current_values[t].y);
456
- }
457
- barrier();
458
- memoryBarrierShared();
459
- for (uint t = 0u; t < num_virtual_threads; t++ )
460
- {
461
- uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
462
- shuffled_values[t].y = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid ^ mask]);
463
- }
464
-
465
- // Computation of each virtual thread
466
- for (uint t = 0u; t < num_virtual_threads; t++ )
467
- {
468
- uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
469
- nbl_glsl_complex shuffled_value = shuffled_values[t];
470
-
471
- nbl_glsl_complex twiddle = (! is_inverse)
472
- ? nbl_glsl_ext_FFT_twiddle_OLD(tid, i, logTwo)
473
- : nbl_glsl_ext_FFT_twiddleInverse_OLD(tid, i, logTwo);
474
-
475
- nbl_glsl_complex this_value = current_values[t];
476
-
477
- if (0u < uint (tid & mask)) {
478
- current_values[t] = shuffled_value + nbl_glsl_complex_mul(twiddle, this_value);
479
- } else {
480
- current_values[t] = this_value + nbl_glsl_complex_mul(twiddle, shuffled_value);
481
- }
482
- }
483
- }
484
-
485
- for (uint t = 0u; t < num_virtual_threads; t++ )
486
- {
487
- uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
488
- uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
489
- nbl_glsl_complex complex_value = (! is_inverse)
490
- ? current_values[t]
491
- : current_values[t] / dataLength;
492
-
493
- nbl_glsl_ext_FFT_setData(coords, channel, complex_value);
494
- }
495
- }
496
-
497
376
#endif
0 commit comments