Skip to content

Commit ae94f67

Browse files
committed
u's everywhere :D
1 parent b1b231f commit ae94f67

File tree

3 files changed

+41
-47
lines changed

3 files changed

+41
-47
lines changed

include/nbl/builtin/glsl/ext/FFT/fft.glsl

Lines changed: 31 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in vec2 comp
7676
// Count Leading Zeroes (naive?)
7777
uint nbl_glsl_ext_FFT_clz(in uint x)
7878
{
79-
return 31 - findMSB(x);
79+
return 31u - findMSB(x);
8080
}
8181

8282
uint nbl_glsl_ext_FFT_reverseBits(in uint x)
@@ -92,7 +92,7 @@ uint nbl_glsl_ext_FFT_calculateTwiddlePower(in uint threadId, in uint iteration,
9292
vec2 nbl_glsl_ext_FFT_twiddle(in uint threadId, in uint iteration, in uint logTwoN, in uint N)
9393
{
9494
uint k = nbl_glsl_ext_FFT_calculateTwiddlePower(threadId, iteration, logTwoN, N);
95-
return nbl_glsl_eITheta(-1 * 2 * nbl_glsl_PI * k / N);
95+
return nbl_glsl_expImaginary(-1 * 2 * nbl_glsl_PI * k / N);
9696
}
9797

9898
vec2 nbl_gnbl_glsl_ext_FFT_twiddleInverse(in uint threadId, in uint iteration, in uint logTwoN, in uint N)
@@ -122,53 +122,43 @@ uvec3 nbl_glsl_ext_FFT_getBitReversedCoordinates(in uvec3 coords, in uint leadin
122122

123123
uint nbl_glsl_ext_FFT_getDimLength(uvec3 dimension)
124124
{
125-
uint dataLength = 0;
126-
127-
if(pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_X_) {
128-
return dimension.x;
129-
} else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Y_) {
130-
return dimension.y;
131-
} else if (pc.direction == _NBL_GLSL_EXT_FFT_DIRECTION_Z_) {
132-
return dimension.z;
133-
}
134-
135-
return dataLength;
125+
return dimension[pc.direction];
136126
}
137127

138128
vec2 nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint channel) {
139-
uint min_x = 0;
140-
uint max_x = pc.dimension.x + min_x - 1;
129+
uint min_x = 0u;
130+
uint max_x = pc.dimension.x + min_x - 1u;
141131

142-
uint min_y = 0;
143-
uint max_y = pc.dimension.y + min_y - 1;
132+
uint min_y = 0u;
133+
uint max_y = pc.dimension.y + min_y - 1u;
144134

145-
uint min_z = 0;
146-
uint max_z = pc.dimension.z + min_z - 1;
135+
uint min_z = 0u;
136+
uint max_z = pc.dimension.z + min_z - 1u;
147137

148138

149-
uvec3 actual_coord = uvec3(0, 0, 0);
139+
uvec3 actual_coord = uvec3(0u, 0u, 0u);
150140

151141
if(_NBL_GLSL_EXT_FFT_CLAMP_TO_EDGE_ == pc.padding_type) {
152142
if (coordinate.x < min_x) {
153-
actual_coord.x = 0;
143+
actual_coord.x = 0u;
154144
} else if(coordinate.x > max_x) {
155-
actual_coord.x = pc.dimension.x - 1;
145+
actual_coord.x = pc.dimension.x - 1u;
156146
} else {
157147
actual_coord.x = coordinate.x - min_x;
158148
}
159149

160150
if (coordinate.y < min_y) {
161-
actual_coord.y = 0;
151+
actual_coord.y = 0u;
162152
} else if (coordinate.y > max_y) {
163-
actual_coord.y = pc.dimension.y - 1;
153+
actual_coord.y = pc.dimension.y - 1u;
164154
} else {
165155
actual_coord.y = coordinate.y - min_y;
166156
}
167157

168158
if (coordinate.z < min_z) {
169-
actual_coord.z = 0;
159+
actual_coord.z = 0u;
170160
} else if (coordinate.z > max_z) {
171-
actual_coord.z = pc.dimension.z - 1;
161+
actual_coord.z = pc.dimension.z - 1u;
172162
} else {
173163
actual_coord.z = coordinate.z - min_z;
174164
}
@@ -194,20 +184,20 @@ void nbl_glsl_ext_FFT()
194184
{
195185
// Virtual Threads Calculation
196186
uint dataLength = nbl_glsl_ext_FFT_getDimLength(pc.padded_dimension);
197-
uint num_virtual_threads = uint(ceil(float(dataLength) / float(_NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_)));
187+
uint num_virtual_threads = (dataLength-1u)/(_NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_)+1u;
198188
uint thread_offset = gl_LocalInvocationIndex;
199189

200190
uint channel = nbl_glsl_ext_FFT_getChannel();
201191

202192
// Pass 0: Bit Reversal
203-
uint leadingZeroes = nbl_glsl_ext_FFT_clz(dataLength) + 1;
204-
uint logTwo = 32 - leadingZeroes;
193+
uint leadingZeroes = nbl_glsl_ext_FFT_clz(dataLength) + 1u;
194+
uint logTwo = 32u - leadingZeroes;
205195

206196
vec2 current_values[_NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD];
207197
vec2 shuffled_values[_NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD];
208198

209199
// Load Initial Values into Local Mem (bit reversed indices)
210-
for(uint t = 0; t < num_virtual_threads; t++)
200+
for(uint t = 0u; t < num_virtual_threads; t++)
211201
{
212202
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
213203
uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
@@ -217,65 +207,65 @@ void nbl_glsl_ext_FFT()
217207
}
218208

219209
// For loop for each stage of the FFT (each virtual thread computes 1 buttefly wing)
220-
for(uint i = 0; i < logTwo; ++i)
210+
for(uint i = 0u; i < logTwo; ++i)
221211
{
222-
uint mask = 1 << i;
212+
uint mask = 1u << i;
223213

224214
// Data Exchange for virtual threads :
225215
// X and Y are seperate to use less shared memory for complex numbers
226216
// Get Shuffled Values X for virtual threads
227-
for(uint t = 0; t < num_virtual_threads; t++)
217+
for(uint t = 0u; t < num_virtual_threads; t++)
228218
{
229219
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
230220
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid] = floatBitsToUint(current_values[t].x);
231221
}
232222
barrier();
233223
memoryBarrierShared();
234-
for(uint t = 0; t < num_virtual_threads; t++)
224+
for(uint t = 0u; t < num_virtual_threads; t++)
235225
{
236226
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
237227
shuffled_values[t].x = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid ^ mask]);
238228
}
239229

240230
// Get Shuffled Values Y for virtual threads
241-
for(uint t = 0; t < num_virtual_threads; t++)
231+
for(uint t = 0u; t < num_virtual_threads; t++)
242232
{
243233
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
244234
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid] = floatBitsToUint(current_values[t].y);
245235
}
246236
barrier();
247237
memoryBarrierShared();
248-
for(uint t = 0; t < num_virtual_threads; t++)
238+
for(uint t = 0u; t < num_virtual_threads; t++)
249239
{
250240
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
251241
shuffled_values[t].y = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[tid ^ mask]);
252242
}
253243

254244
// Computation of each virtual thread
255-
for(uint t = 0; t < num_virtual_threads; t++)
245+
for(uint t = 0u; t < num_virtual_threads; t++)
256246
{
257247
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
258248
vec2 shuffled_value = shuffled_values[t];
259249

260-
vec2 nbl_glsl_ext_FFT_twiddle = (0 == pc.is_inverse)
250+
vec2 nbl_glsl_ext_FFT_twiddle = (0u == pc.is_inverse)
261251
? nbl_glsl_ext_FFT_twiddle(tid, i, logTwo, dataLength)
262252
: nbl_gnbl_glsl_ext_FFT_twiddleInverse(tid, i, logTwo, dataLength);
263253

264254
vec2 this_value = current_values[t];
265255

266-
if(0 < uint(tid & mask)) {
256+
if(0u < uint(tid & mask)) {
267257
current_values[t] = shuffled_value + nbl_glsl_complex_mul(nbl_glsl_ext_FFT_twiddle, this_value);
268258
} else {
269259
current_values[t] = this_value + nbl_glsl_complex_mul(nbl_glsl_ext_FFT_twiddle, shuffled_value);
270260
}
271261
}
272262
}
273263

274-
for(uint t = 0; t < num_virtual_threads; t++)
264+
for(uint t = 0u; t < num_virtual_threads; t++)
275265
{
276266
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
277267
uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
278-
vec2 complex_value = (0 == pc.is_inverse)
268+
vec2 complex_value = (0u == pc.is_inverse)
279269
? current_values[t]
280270
: current_values[t] / dataLength;
281271

include/nbl/builtin/glsl/math/complex.glsl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,31 @@
88
#include <nbl/builtin/glsl/math/constants.glsl>
99
#include <nbl/builtin/glsl/math/functions.glsl>
1010

11-
vec2 nbl_glsl_eITheta(in float _theta)
11+
#define nbl_glsl_complex vec2
12+
#define nbl_glsl_cvec2 mat2
13+
#define nbl_glsl_cvec3 mat3x2
14+
#define nbl_glsl_cvec4 mat4x2
15+
16+
nbl_glsl_complex nbl_glsl_expImaginary(in float _theta)
1217
{
13-
// Use sincos from math/functions.glsl?
1418
float r = cos(_theta);
1519
float i = sin(_theta);
1620
return vec2(r, i);
1721
}
1822

19-
vec2 nbl_glsl_complex_mul(in vec2 rhs, in vec2 lhs)
23+
nbl_glsl_complex nbl_glsl_complex_mul(in nbl_glsl_complex rhs, in nbl_glsl_complex lhs)
2024
{
2125
float r = rhs.x * lhs.x - rhs.y * lhs.y;
2226
float i = rhs.x * lhs.y + rhs.y * lhs.x;
2327
return vec2(r, i);
2428
}
2529

26-
vec2 nbl_glsl_complex_add(in vec2 rhs, in vec2 lhs)
30+
nbl_glsl_complex nbl_glsl_complex_add(in nbl_glsl_complex rhs, in nbl_glsl_complex lhs)
2731
{
2832
return rhs + lhs;
2933
}
3034

31-
vec2 nbl_glsl_complex_conjugate(in vec2 complex) {
35+
nbl_glsl_complex nbl_glsl_complex_conjugate(in nbl_glsl_complex complex) {
3236
return complex * vec2(1, -1);
3337
}
3438

src/nbl/ext/FFT/FFT.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ void main()
217217

218218
const size_t extraSize = 32 + 32 + 32 + 32;
219219

220-
const uint32_t maxItemsPerThread = core::ceil(float(maxPaddedDimensionSize) / DEFAULT_WORK_GROUP_X_DIM);
220+
const uint32_t maxItemsPerThread = (maxPaddedDimensionSize - 1u) / (DEFAULT_WORK_GROUP_X_DIM) + 1u;
221221
const uint32_t useSSBO = (DataType::SSBO == inputType) ? 1 : 0;
222222
auto shader = core::make_smart_refctd_ptr<ICPUBuffer>(strlen(sourceFmt)+extraSize+1u);
223223
snprintf(

0 commit comments

Comments
 (0)