Skip to content

Commit f8ee656

Browse files
committed
fft.glsl: Initial Data Exchange (bitReverseShuffle)
1 parent 8861228 commit f8ee656

File tree

1 file changed

+69
-5
lines changed
  • include/nbl/builtin/glsl/ext/FFT

1 file changed

+69
-5
lines changed

include/nbl/builtin/glsl/ext/FFT/fft.glsl

Lines changed: 69 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,76 @@ void nbl_glsl_ext_FFT(bool is_inverse)
179179
uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0, dataLength); // same as tid * 2
180180

181181
uvec3 coords_e = nbl_glsl_ext_FFT_getCoordinates(even_index);
182-
uvec3 bitReversedCoords_e = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords_e, leadingZeroes);
183-
even_values[t] = nbl_glsl_ext_FFT_getPaddedData(bitReversedCoords_e, channel);
182+
// uvec3 bitReversedCoords_e = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords_e, leadingZeroes);
183+
even_values[t] = nbl_glsl_ext_FFT_getPaddedData(coords_e, channel);
184184

185185
uvec3 coords_o = nbl_glsl_ext_FFT_getCoordinates(even_index + 1);
186-
uvec3 bitReversedCoords_o = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords_o, leadingZeroes);
187-
odd_values[t] = nbl_glsl_ext_FFT_getPaddedData(bitReversedCoords_o, channel);
186+
// uvec3 bitReversedCoords_o = nbl_glsl_ext_FFT_getBitReversedCoordinates(coords_o, leadingZeroes);
187+
odd_values[t] = nbl_glsl_ext_FFT_getPaddedData(coords_o, channel);
188+
}
189+
190+
// Initial Data Exchange
191+
{
192+
// Get Even/Odd Values X for virtual threads
193+
for(uint t = 0u; t < num_virtual_threads; t++)
194+
{
195+
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
196+
197+
uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0, dataLength); // same as tid * 2
198+
uint odd_index = even_index + 1;
199+
200+
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[even_index] = floatBitsToUint(even_values[t].x);
201+
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[odd_index] = floatBitsToUint(odd_values[t].x);
202+
}
203+
204+
barrier();
205+
memoryBarrierShared();
206+
207+
for(uint t = 0u; t < num_virtual_threads; t++)
208+
{
209+
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
210+
211+
uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0, dataLength); // same as tid * 2
212+
uint odd_index = even_index + 1;
213+
214+
uint even_rev_bits = bitfieldReverse(even_index) >> leadingZeroes;
215+
uint odd_rev_bits = bitfieldReverse(odd_index) >> leadingZeroes;
216+
217+
even_values[t].x = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[even_rev_bits]);
218+
odd_values[t].x = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[odd_rev_bits]);
219+
}
220+
221+
barrier();
222+
memoryBarrierShared();
223+
224+
// Get Even/Odd Values Y for virtual threads
225+
for(uint t = 0u; t < num_virtual_threads; t++)
226+
{
227+
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
228+
229+
uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0, dataLength); // same as tid * 2
230+
uint odd_index = even_index + 1;
231+
232+
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[even_index] = floatBitsToUint(even_values[t].y);
233+
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[odd_index] = floatBitsToUint(odd_values[t].y);
234+
}
235+
236+
barrier();
237+
memoryBarrierShared();
238+
239+
for(uint t = 0u; t < num_virtual_threads; t++)
240+
{
241+
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
242+
243+
uint even_index = nbl_glsl_ext_FFT_getEvenIndex(tid, 0, dataLength); // same as tid * 2
244+
uint odd_index = even_index + 1;
245+
246+
uint even_rev_bits = bitfieldReverse(even_index) >> leadingZeroes;
247+
uint odd_rev_bits = bitfieldReverse(odd_index) >> leadingZeroes;
248+
249+
even_values[t].y = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[even_rev_bits]);
250+
odd_values[t].y = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[odd_rev_bits]);
251+
}
188252
}
189253

190254
// For loop for each stage of the FFT (each virtual thread computes 1 buttefly)
@@ -207,7 +271,7 @@ void nbl_glsl_ext_FFT(bool is_inverse)
207271
odd_values[t] = even_value - cmplx_mul;
208272
}
209273

210-
// Exchange Even and Odd Values with Other Threads (or maybe this thread)
274+
// Exchange Even/Odd Values with Other Threads (or sometimes the same thread)
211275
if(i < logTwo - 1)
212276
{
213277
// Get Even/Odd Values X for virtual threads

0 commit comments

Comments
 (0)