@@ -76,7 +76,7 @@ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in vec2 comp
76
76
// Count Leading Zeroes (naive?)
77
77
uint nbl_glsl_ext_FFT_clz(in uint x)
78
78
{
79
- return 31 - findMSB(x);
79
+ return 31u - findMSB(x);
80
80
}
81
81
82
82
uint nbl_glsl_ext_FFT_reverseBits(in uint x)
@@ -92,7 +92,7 @@ uint nbl_glsl_ext_FFT_calculateTwiddlePower(in uint threadId, in uint iteration,
92
92
vec2 nbl_glsl_ext_FFT_twiddle(in uint threadId, in uint iteration, in uint logTwoN, in uint N)
93
93
{
94
94
uint k = nbl_glsl_ext_FFT_calculateTwiddlePower(threadId, iteration, logTwoN, N);
95
- return nbl_glsl_eITheta (- 1 * 2 * nbl_glsl_PI * k / N);
95
+ return nbl_glsl_expImaginary (- 1 * 2 * nbl_glsl_PI * k / N);
96
96
}
97
97
98
98
vec2 nbl_gnbl_glsl_ext_FFT_twiddleInverse(in uint threadId, in uint iteration, in uint logTwoN, in uint N)
@@ -122,53 +122,43 @@ uvec3 nbl_glsl_ext_FFT_getBitReversedCoordinates(in uvec3 coords, in uint leadin
122
122
123
123
uint nbl_glsl_ext_FFT_getDimLength(uvec3 dimension)
124
124
{
125
- uint dataLength = 0 ;
126
-
127
- if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_X_) {
128
- return dimension.x;
129
- } else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Y_) {
130
- return dimension.y;
131
- } else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Z_) {
132
- return dimension.z;
133
- }
134
-
135
- return dataLength;
125
+ return dimension[pc.direction];
136
126
}
137
127
138
128
vec2 nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint channel) {
139
- uint min_x = 0 ;
140
- uint max_x = pc.dimension.x + min_x - 1 ;
129
+ uint min_x = 0u ;
130
+ uint max_x = pc.dimension.x + min_x - 1u ;
141
131
142
- uint min_y = 0 ;
143
- uint max_y = pc.dimension.y + min_y - 1 ;
132
+ uint min_y = 0u ;
133
+ uint max_y = pc.dimension.y + min_y - 1u ;
144
134
145
- uint min_z = 0 ;
146
- uint max_z = pc.dimension.z + min_z - 1 ;
135
+ uint min_z = 0u ;
136
+ uint max_z = pc.dimension.z + min_z - 1u ;
147
137
148
138
149
- uvec3 actual_coord = uvec3 (0 , 0 , 0 );
139
+ uvec3 actual_coord = uvec3 (0u, 0u, 0u );
150
140
151
141
if (_NBL_GLSL_EXT_FFT_CLAMP_TO_EDGE_ == pc.padding_type) {
152
142
if (coordinate.x < min_x) {
153
- actual_coord.x = 0 ;
143
+ actual_coord.x = 0u ;
154
144
} else if (coordinate.x > max_x) {
155
- actual_coord.x = pc.dimension.x - 1 ;
145
+ actual_coord.x = pc.dimension.x - 1u ;
156
146
} else {
157
147
actual_coord.x = coordinate.x - min_x;
158
148
}
159
149
160
150
if (coordinate.y < min_y) {
161
- actual_coord.y = 0 ;
151
+ actual_coord.y = 0u ;
162
152
} else if (coordinate.y > max_y) {
163
- actual_coord.y = pc.dimension.y - 1 ;
153
+ actual_coord.y = pc.dimension.y - 1u ;
164
154
} else {
165
155
actual_coord.y = coordinate.y - min_y;
166
156
}
167
157
168
158
if (coordinate.z < min_z) {
169
- actual_coord.z = 0 ;
159
+ actual_coord.z = 0u ;
170
160
} else if (coordinate.z > max_z) {
171
- actual_coord.z = pc.dimension.z - 1 ;
161
+ actual_coord.z = pc.dimension.z - 1u ;
172
162
} else {
173
163
actual_coord.z = coordinate.z - min_z;
174
164
}
@@ -194,20 +184,20 @@ void nbl_glsl_ext_FFT()
194
184
{
195
185
// Virtual Threads Calculation
196
186
uint dataLength = nbl_glsl_ext_FFT_getDimLength(pc.padded_dimension);
197
- uint num_virtual_threads = uint ( ceil ( float ( dataLength) / float (_NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_))) ;
187
+ uint num_virtual_threads = ( dataLength- 1u) / (_NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_)+ 1u ;
198
188
uint thread_offset = gl_LocalInvocationIndex;
199
189
200
190
uint channel = nbl_glsl_ext_FFT_getChannel();
201
191
202
192
// Pass 0: Bit Reversal
203
- uint leadingZeroes = nbl_glsl_ext_FFT_clz(dataLength) + 1 ;
204
- uint logTwo = 32 - leadingZeroes;
193
+ uint leadingZeroes = nbl_glsl_ext_FFT_clz(dataLength) + 1u ;
194
+ uint logTwo = 32u - leadingZeroes;
205
195
206
196
vec2 current_values[_NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD];
207
197
vec2 shuffled_values[_NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD];
208
198
209
199
// Load Initial Values into Local Mem (bit reversed indices)
210
- for (uint t = 0 ; t < num_virtual_threads; t++ )
200
+ for (uint t = 0u ; t < num_virtual_threads; t++ )
211
201
{
212
202
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
213
203
uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
@@ -217,65 +207,65 @@ void nbl_glsl_ext_FFT()
217
207
}
218
208
219
209
// For loop for each stage of the FFT (each virtual thread computes 1 buttefly wing)
220
- for (uint i = 0 ; i < logTwo; ++ i)
210
+ for (uint i = 0u ; i < logTwo; ++ i)
221
211
{
222
- uint mask = 1 << i;
212
+ uint mask = 1u << i;
223
213
224
214
// Data Exchange for virtual threads :
225
215
// X and Y are seperate to use less shared memory for complex numbers
226
216
// Get Shuffled Values X for virtual threads
227
- for (uint t = 0 ; t < num_virtual_threads; t++ )
217
+ for (uint t = 0u ; t < num_virtual_threads; t++ )
228
218
{
229
219
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
230
220
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid] = floatBitsToUint(current_values[t].x);
231
221
}
232
222
barrier();
233
223
memoryBarrierShared();
234
- for (uint t = 0 ; t < num_virtual_threads; t++ )
224
+ for (uint t = 0u ; t < num_virtual_threads; t++ )
235
225
{
236
226
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
237
227
shuffled_values[t].x = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid ^ mask]);
238
228
}
239
229
240
230
// Get Shuffled Values Y for virtual threads
241
- for (uint t = 0 ; t < num_virtual_threads; t++ )
231
+ for (uint t = 0u ; t < num_virtual_threads; t++ )
242
232
{
243
233
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
244
234
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid] = floatBitsToUint(current_values[t].y);
245
235
}
246
236
barrier();
247
237
memoryBarrierShared();
248
- for (uint t = 0 ; t < num_virtual_threads; t++ )
238
+ for (uint t = 0u ; t < num_virtual_threads; t++ )
249
239
{
250
240
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
251
241
shuffled_values[t].y = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid ^ mask]);
252
242
}
253
243
254
244
// Computation of each virtual thread
255
- for (uint t = 0 ; t < num_virtual_threads; t++ )
245
+ for (uint t = 0u ; t < num_virtual_threads; t++ )
256
246
{
257
247
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
258
248
vec2 shuffled_value = shuffled_values[t];
259
249
260
- vec2 nbl_glsl_ext_FFT_twiddle = (0 == pc.is_inverse)
250
+ vec2 nbl_glsl_ext_FFT_twiddle = (0u == pc.is_inverse)
261
251
? nbl_glsl_ext_FFT_twiddle(tid, i, logTwo, dataLength)
262
252
: nbl_gnbl_glsl_ext_FFT_twiddleInverse(tid, i, logTwo, dataLength);
263
253
264
254
vec2 this_value = current_values[t];
265
255
266
- if (0 < uint (tid & mask)) {
256
+ if (0u < uint (tid & mask)) {
267
257
current_values[t] = shuffled_value + nbl_glsl_complex_mul(nbl_glsl_ext_FFT_twiddle, this_value);
268
258
} else {
269
259
current_values[t] = this_value + nbl_glsl_complex_mul(nbl_glsl_ext_FFT_twiddle, shuffled_value);
270
260
}
271
261
}
272
262
}
273
263
274
- for (uint t = 0 ; t < num_virtual_threads; t++ )
264
+ for (uint t = 0u ; t < num_virtual_threads; t++ )
275
265
{
276
266
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
277
267
uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
278
- vec2 complex_value = (0 == pc.is_inverse)
268
+ vec2 complex_value = (0u == pc.is_inverse)
279
269
? current_values[t]
280
270
: current_values[t] / dataLength;
281
271
0 commit comments