@@ -74,95 +74,53 @@ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in vec2 comp
74
74
#endif
75
75
76
76
// Count Leading Zeroes (naive?)
77
- uint clz (in uint x)
77
+ uint nbl_glsl_ext_FFT_clz (in uint x)
78
78
{
79
- uint n = 0 ;
80
- if (x == 0 ) { return 32 ; }
81
- if (x <= 0x0000ffff) { n += 16 ; x <<= 16 ; }
82
- if (x <= 0x00ffffff) { n += 8 ; x <<= 8 ; }
83
- if (x <= 0x0fffffff) { n += 4 ; x <<= 4 ; }
84
- if (x <= 0x3fffffff) { n += 2 ; x <<= 2 ; }
85
- if (x <= 0x7fffffff) { n++ ; };
86
- return n;
79
+ return 31 - findMSB(x);
87
80
}
88
81
89
- uint reverseBits (in uint x)
82
+ uint nbl_glsl_ext_FFT_reverseBits (in uint x)
90
83
{
91
- uint count = 4 * 8 - 1 ;
92
- uint reverse_num = x;
93
-
94
- x >>= 1 ;
95
- while (x > 0 )
96
- {
97
- reverse_num <<= 1 ;
98
- reverse_num |= x & 1 ;
99
- x >>= 1 ;
100
- count-- ;
101
- }
102
- reverse_num <<= count;
103
- return reverse_num;
84
+ return bitfieldReverse(x);
104
85
}
105
86
106
- uint calculate_twiddle_power (in uint threadId, in uint iteration, in uint logTwoN, in uint N)
87
+ uint nbl_glsl_ext_FFT_calculateTwiddlePower (in uint threadId, in uint iteration, in uint logTwoN, in uint N)
107
88
{
108
89
return (threadId & ((N / (1u << (logTwoN - iteration))) * 2 - 1 )) * ((1u << (logTwoN - iteration)) / 2 );;
109
90
}
110
91
111
- vec2 twiddle (in uint threadId, in uint iteration, in uint logTwoN, in uint N)
92
+ vec2 nbl_glsl_ext_FFT_twiddle (in uint threadId, in uint iteration, in uint logTwoN, in uint N)
112
93
{
113
- uint k = calculate_twiddle_power (threadId, iteration, logTwoN, N);
94
+ uint k = nbl_glsl_ext_FFT_calculateTwiddlePower (threadId, iteration, logTwoN, N);
114
95
return nbl_glsl_eITheta(- 1 * 2 * nbl_glsl_PI * k / N);
115
96
}
116
97
117
- vec2 twiddle_inv (in uint threadId, in uint iteration, in uint logTwoN, in uint N)
98
+ vec2 nbl_gnbl_glsl_ext_FFT_twiddleInverse (in uint threadId, in uint iteration, in uint logTwoN, in uint N)
118
99
{
119
- float k = calculate_twiddle_power(threadId, iteration, logTwoN, N);
120
- return nbl_glsl_eITheta(2 * nbl_glsl_PI * k / N);
100
+ return nbl_glsl_complex_conjugate(nbl_glsl_ext_FFT_twiddle(threadId, iteration, logTwoN, N));
121
101
}
122
102
123
- uint getChannel ()
103
+ uint nbl_glsl_ext_FFT_getChannel ()
124
104
{
125
- if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_X_) {
126
- return gl_WorkGroupID.x;
127
- } else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Y_) {
128
- return gl_WorkGroupID.y;
129
- } else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Z_) {
130
- return gl_WorkGroupID.z;
131
- } else {
132
- return 0 ;
133
- }
105
+ return gl_WorkGroupID[pc.direction];
134
106
}
135
107
136
- uvec3 getCoordinates (in uint tidx)
108
+ uvec3 nbl_glsl_ext_FFT_getCoordinates (in uint tidx)
137
109
{
138
- if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_X_) {
139
- return uvec3 (tidx, gl_WorkGroupID.y, gl_WorkGroupID.z);
140
- } else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Y_) {
141
- return uvec3 (gl_WorkGroupID.x, tidx, gl_WorkGroupID.z);
142
- } else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Z_) {
143
- return uvec3 (gl_WorkGroupID.x, gl_WorkGroupID.y, tidx);
144
- } else {
145
- return uvec3 (0 ,0 ,0 );
146
- }
110
+ uvec3 tmp = gl_WorkGroupID;
111
+ tmp[pc.direction] = tidx;
112
+ return tmp;
147
113
}
148
114
149
- uvec3 getBitReversedCoordinates (in uvec3 coords, in uint leadingZeroes)
115
+ uvec3 nbl_glsl_ext_FFT_getBitReversedCoordinates (in uvec3 coords, in uint leadingZeroes)
150
116
{
151
- if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_X_) {
152
- uint bitReversedIndex = reverseBits(coords.x) >> leadingZeroes;
153
- return uvec3 (bitReversedIndex, coords.y, coords.z);
154
- } else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Y_) {
155
- uint bitReversedIndex = reverseBits(coords.y) >> leadingZeroes;
156
- return uvec3 (coords.x, bitReversedIndex, coords.z);
157
- } else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Z_) {
158
- uint bitReversedIndex = reverseBits(coords.z) >> leadingZeroes;
159
- return uvec3 (coords.x, coords.y, bitReversedIndex);
160
- } else {
161
- return uvec3 (0 ,0 ,0 );
162
- }
117
+ uint bitReversedIndex = nbl_glsl_ext_FFT_reverseBits(coords[pc.direction]) >> leadingZeroes;
118
+ uvec3 tmp = coords;
119
+ tmp[pc.direction] = bitReversedIndex;
120
+ return tmp;
163
121
}
164
122
165
- uint getDimLength (uvec3 dimension)
123
+ uint nbl_glsl_ext_FFT_getDimLength (uvec3 dimension)
166
124
{
167
125
uint dataLength = 0 ;
168
126
@@ -235,14 +193,14 @@ vec2 nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint channel) {
235
193
void nbl_glsl_ext_FFT()
236
194
{
237
195
// Virtual Threads Calculation
238
- uint dataLength = getDimLength (pc.padded_dimension);
196
+ uint dataLength = nbl_glsl_ext_FFT_getDimLength (pc.padded_dimension);
239
197
uint num_virtual_threads = uint (ceil (float (dataLength) / float (_NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_)));
240
198
uint thread_offset = gl_LocalInvocationIndex;
241
199
242
- uint channel = getChannel ();
200
+ uint channel = nbl_glsl_ext_FFT_getChannel ();
243
201
244
202
// Pass 0: Bit Reversal
245
- uint leadingZeroes = clz (dataLength) + 1 ;
203
+ uint leadingZeroes = nbl_glsl_ext_FFT_clz (dataLength) + 1 ;
246
204
uint logTwo = 32 - leadingZeroes;
247
205
248
206
vec2 current_values[_NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD];
@@ -252,8 +210,8 @@ void nbl_glsl_ext_FFT()
252
210
for (uint t = 0 ; t < num_virtual_threads; t++ )
253
211
{
254
212
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
255
- uvec3 coords = getCoordinates (tid);
256
- uvec3 bitReversedCoords = getBitReversedCoordinates (coords, leadingZeroes);
213
+ uvec3 coords = nbl_glsl_ext_FFT_getCoordinates (tid);
214
+ uvec3 bitReversedCoords = nbl_glsl_ext_FFT_getBitReversedCoordinates (coords, leadingZeroes);
257
215
258
216
current_values[t] = nbl_glsl_ext_FFT_getPaddedData(bitReversedCoords, channel);
259
217
}
@@ -299,24 +257,24 @@ void nbl_glsl_ext_FFT()
299
257
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
300
258
vec2 shuffled_value = shuffled_values[t];
301
259
302
- vec2 twiddle = (0 == pc.is_inverse)
303
- ? twiddle (tid, i, logTwo, dataLength)
304
- : twiddle_inv (tid, i, logTwo, dataLength);
260
+ vec2 nbl_glsl_ext_FFT_twiddle = (0 == pc.is_inverse)
261
+ ? nbl_glsl_ext_FFT_twiddle (tid, i, logTwo, dataLength)
262
+ : nbl_gnbl_glsl_ext_FFT_twiddleInverse (tid, i, logTwo, dataLength);
305
263
306
264
vec2 this_value = current_values[t];
307
265
308
266
if (0 < uint (tid & mask)) {
309
- current_values[t] = shuffled_value + nbl_glsl_complex_mul(twiddle , this_value);
267
+ current_values[t] = shuffled_value + nbl_glsl_complex_mul(nbl_glsl_ext_FFT_twiddle , this_value);
310
268
} else {
311
- current_values[t] = this_value + nbl_glsl_complex_mul(twiddle , shuffled_value);
269
+ current_values[t] = this_value + nbl_glsl_complex_mul(nbl_glsl_ext_FFT_twiddle , shuffled_value);
312
270
}
313
271
}
314
272
}
315
273
316
274
for (uint t = 0 ; t < num_virtual_threads; t++ )
317
275
{
318
276
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
319
- uvec3 coords = getCoordinates (tid);
277
+ uvec3 coords = nbl_glsl_ext_FFT_getCoordinates (tid);
320
278
vec2 complex_value = (0 == pc.is_inverse)
321
279
? current_values[t]
322
280
: current_values[t] / dataLength;
0 commit comments