@@ -179,12 +179,76 @@ void nbl_glsl_ext_FFT(bool is_inverse)
179
179
uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0 , dataLength); // same as tid * 2
180
180
181
181
uvec3 coords_e = nbl_glsl_ext_FFT_getCoordinates(even_index);
182
- uvec3 bitReversedCoords_e = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords_e, leadingZeroes);
183
- even_values[t] = nbl_glsl_ext_FFT_getPaddedData(bitReversedCoords_e , channel);
182
+ // uvec3 bitReversedCoords_e = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords_e, leadingZeroes);
183
+ even_values[t] = nbl_glsl_ext_FFT_getPaddedData(coords_e , channel);
184
184
185
185
uvec3 coords_o = nbl_glsl_ext_FFT_getCoordinates(even_index + 1 );
186
- uvec3 bitReversedCoords_o = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords_o, leadingZeroes);
187
- odd_values[t] = nbl_glsl_ext_FFT_getPaddedData(bitReversedCoords_o, channel);
186
+ // uvec3 bitReversedCoords_o = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords_o, leadingZeroes);
187
+ odd_values[t] = nbl_glsl_ext_FFT_getPaddedData(coords_o, channel);
188
+ }
189
+
190
+ // Initial Data Exchange
191
+ {
192
+ // Get Even/Odd Values X for virtual threads
193
+ for (uint t = 0u; t < num_virtual_threads; t++ )
194
+ {
195
+ uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
196
+
197
+ uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0 , dataLength); // same as tid * 2
198
+ uint odd_index = even_index + 1 ;
199
+
200
+ _NBL_GLSL_SCRATCH_SHARED_DEFINED_[even_index] = floatBitsToUint(even_values[t].x);
201
+ _NBL_GLSL_SCRATCH_SHARED_DEFINED_[odd_index] = floatBitsToUint(odd_values[t].x);
202
+ }
203
+
204
+ barrier();
205
+ memoryBarrierShared();
206
+
207
+ for (uint t = 0u; t < num_virtual_threads; t++ )
208
+ {
209
+ uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
210
+
211
+ uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0 , dataLength); // same as tid * 2
212
+ uint odd_index = even_index + 1 ;
213
+
214
+ uint even_rev_bits = bitfieldReverse(even_index) >> leadingZeroes;
215
+ uint odd_rev_bits = bitfieldReverse(odd_index) >> leadingZeroes;
216
+
217
+ even_values[t].x = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[even_rev_bits]);
218
+ odd_values[t].x = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[odd_rev_bits]);
219
+ }
220
+
221
+ barrier();
222
+ memoryBarrierShared();
223
+
224
+ // Get Even/Odd Values Y for virtual threads
225
+ for (uint t = 0u; t < num_virtual_threads; t++ )
226
+ {
227
+ uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
228
+
229
+ uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0 , dataLength); // same as tid * 2
230
+ uint odd_index = even_index + 1 ;
231
+
232
+ _NBL_GLSL_SCRATCH_SHARED_DEFINED_[even_index] = floatBitsToUint(even_values[t].y);
233
+ _NBL_GLSL_SCRATCH_SHARED_DEFINED_[odd_index] = floatBitsToUint(odd_values[t].y);
234
+ }
235
+
236
+ barrier();
237
+ memoryBarrierShared();
238
+
239
+ for (uint t = 0u; t < num_virtual_threads; t++ )
240
+ {
241
+ uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
242
+
243
+ uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0 , dataLength); // same as tid * 2
244
+ uint odd_index = even_index + 1 ;
245
+
246
+ uint even_rev_bits = bitfieldReverse(even_index) >> leadingZeroes;
247
+ uint odd_rev_bits = bitfieldReverse(odd_index) >> leadingZeroes;
248
+
249
+ even_values[t].y = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[even_rev_bits]);
250
+ odd_values[t].y = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[odd_rev_bits]);
251
+ }
188
252
}
189
253
190
254
// For loop for each stage of the FFT (each virtual thread computes 1 buttefly)
@@ -207,7 +271,7 @@ void nbl_glsl_ext_FFT(bool is_inverse)
207
271
odd_values[t] = even_value - cmplx_mul;
208
272
}
209
273
210
- // Exchange Even and Odd Values with Other Threads (or maybe this thread)
274
+ // Exchange Even/ Odd Values with Other Threads (or sometimes the same thread)
211
275
if (i < logTwo - 1 )
212
276
{
213
277
// Get Even/Odd Values X for virtual threads
0 commit comments