Skip to content

Commit f0c6554

Browse files
committed
packing 3 enums into single uint glsl
1 parent 8393729 commit f0c6554

File tree

5 files changed

+52
-41
lines changed

5 files changed

+52
-41
lines changed

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -471,15 +471,15 @@ int main()
471471
// Convolution
472472
auto convolveDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(convolvePipelineLayout->getDescriptorSetLayout(0u)));
473473
updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_1, fftOutputBuffer_KernelNormalized, fftOutputBuffer_0);
474-
475-
// IFFT X
476-
auto fftDescriptorSet_IFFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
477-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_X.get(), fftOutputBuffer_0, fftOutputBuffer_1);
478-
474+
479475
// IFFT Y
480476
auto fftDescriptorSet_IFFT_Y = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
481-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_Y.get(), fftOutputBuffer_1, fftOutputBuffer_0);
477+
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
482478

479+
// IFFT X
480+
auto fftDescriptorSet_IFFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
481+
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_X.get(), fftOutputBuffer_1, fftOutputBuffer_0);
482+
483483
auto removePaddingShader = createShader_RemovePadding(driver, am);
484484
auto removePaddingPipelineLayout = getPipelineLayout_RemovePadding(driver);
485485
auto removePaddingPipeline = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(removePaddingPipelineLayout), std::move(removePaddingShader));
@@ -519,18 +519,18 @@ int main()
519519
driver->pushConstants(convolvePipelineLayout.get(), nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 4, sizeof(uint32_t), &srcNumChannels); // numSrcChannels
520520
dispatchHelper_Convolution(driver, convolveDispatchInfo);
521521

522-
// Convolved IFFT X
523-
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
524-
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_IFFT_X.get(), nullptr);
525-
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::X, true);
526-
FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
527-
528522
// Convolved IFFT Y
529523
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
530524
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_IFFT_Y.get(), nullptr);
531525
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, true);
532526
FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
533527

528+
// Convolved IFFT X
529+
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
530+
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_IFFT_X.get(), nullptr);
531+
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::X, true);
532+
FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
533+
534534
// Remove Padding and Copy to GPU Image
535535
driver->bindComputePipeline(removePaddingPipeline.get());
536536
driver->bindDescriptorSets(EPBP_COMPUTE, removePaddingPipelineLayout.get(), 0u, 1u, &removePaddingDescriptorSet.get(), nullptr);

include/nbl/builtin/glsl/ext/FFT/default_compute_fft.comp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint cha
100100

101101
bool is_out_of_range = any(bvec3(coordinate!=clamped_coord));
102102

103-
if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == pc.padding_type && is_out_of_range) {
103+
uint paddingType = nbl_glsl_ext_FFT_getPaddingType();
104+
105+
if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == paddingType && is_out_of_range) {
104106
return nbl_glsl_complex(0, 0);
105107
}
106108

include/nbl/builtin/glsl/ext/FFT/fft.glsl

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,14 @@
4545
#define _NBL_GLSL_EXT_FFT_CLAMP_TO_EDGE_ 0
4646
#define _NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ 1
4747

48+
//TODO: investigate why putting this uint between the 2 other uvec3's don't work
4849
#ifndef _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
4950
#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
5051
layout(push_constant) uniform PushConstants
5152
{
5253
layout (offset = 0) uvec3 dimension;
5354
layout (offset = 16) uvec3 padded_dimension;
54-
layout (offset = 32) uint direction;
55-
layout (offset = 36) uint is_inverse;
56-
layout (offset = 40) uint padding_type; // clamp_to_edge or fill_with_zero
55+
layout (offset = 32) uint direction_isInverse_paddingType; // packed into a uint
5756
} pc;
5857
#endif
5958

@@ -100,29 +99,43 @@ nbl_glsl_complex nbl_glsl_ext_FFT_twiddleInverse(in uint threadId, in uint itera
10099
return nbl_glsl_complex_conjugate(nbl_glsl_ext_FFT_twiddle(threadId, iteration, logTwoN));
101100
}
102101

102+
uint nbl_glsl_ext_FFT_getDirection() {
103+
return (pc.direction_isInverse_paddingType >> 16) & 0x000000ff;
104+
}
105+
bool nbl_glsl_ext_FFT_getIsInverse() {
106+
return bool((pc.direction_isInverse_paddingType >> 8) & 0x000000ff);
107+
}
108+
uint nbl_glsl_ext_FFT_getPaddingType() {
109+
return (pc.direction_isInverse_paddingType) & 0x000000ff;
110+
}
111+
103112
uint nbl_glsl_ext_FFT_getChannel()
104113
{
105-
return gl_WorkGroupID[pc.direction];
114+
uint direction = nbl_glsl_ext_FFT_getDirection();
115+
return gl_WorkGroupID[direction];
106116
}
107117

108118
uvec3 nbl_glsl_ext_FFT_getCoordinates(in uint tidx)
109119
{
120+
uint direction = nbl_glsl_ext_FFT_getDirection();
110121
uvec3 tmp = gl_WorkGroupID;
111-
tmp[pc.direction] = tidx;
122+
tmp[direction] = tidx;
112123
return tmp;
113124
}
114125

115126
uvec3 nbl_glsl_ext_FFT_getBitReversedCoordinates(in uvec3 coords, in uint leadingZeroes)
116127
{
117-
uint bitReversedIndex = bitfieldReverse(coords[pc.direction]) >> leadingZeroes;
128+
uint direction = nbl_glsl_ext_FFT_getDirection();
129+
uint bitReversedIndex = bitfieldReverse(coords[direction]) >> leadingZeroes;
118130
uvec3 tmp = coords;
119-
tmp[pc.direction] = bitReversedIndex;
131+
tmp[direction] = bitReversedIndex;
120132
return tmp;
121133
}
122134

123135
uint nbl_glsl_ext_FFT_getDimLength(uvec3 dimension)
124136
{
125-
return dimension[pc.direction];
137+
uint direction = nbl_glsl_ext_FFT_getDirection();
138+
return dimension[direction];
126139
}
127140

128141
void nbl_glsl_ext_FFT()
@@ -134,6 +147,8 @@ void nbl_glsl_ext_FFT()
134147

135148
uint channel = nbl_glsl_ext_FFT_getChannel();
136149

150+
bool is_inverse = nbl_glsl_ext_FFT_getIsInverse();
151+
137152
// Pass 0: Bit Reversal
138153
uint leadingZeroes = nbl_glsl_clz(dataLength) + 1u;
139154
uint logTwo = 32u - leadingZeroes;
@@ -192,7 +207,7 @@ void nbl_glsl_ext_FFT()
192207
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
193208
nbl_glsl_complex shuffled_value = shuffled_values[t];
194209

195-
nbl_glsl_complex twiddle = (0u == pc.is_inverse)
210+
nbl_glsl_complex twiddle = (is_inverse)
196211
? nbl_glsl_ext_FFT_twiddle(tid, i, logTwo)
197212
: nbl_glsl_ext_FFT_twiddleInverse(tid, i, logTwo);
198213

@@ -210,7 +225,7 @@ void nbl_glsl_ext_FFT()
210225
{
211226
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
212227
uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
213-
nbl_glsl_complex complex_value = (0u == pc.is_inverse)
228+
nbl_glsl_complex complex_value = (is_inverse)
214229
? current_values[t]
215230
: current_values[t] / dataLength;
216231

include/nbl/ext/FFT/FFT.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ class FFT : public core::TotalInterface
2020
{
2121
public:
2222

23-
enum class Direction : uint32_t {
23+
enum class Direction : uint8_t {
2424
X = 0,
2525
Y = 1,
2626
Z = 2,
2727
};
2828

29-
enum class PaddingType : uint32_t {
29+
enum class PaddingType : uint8_t {
3030
CLAMP_TO_EDGE = 0,
3131
FILL_WITH_ZERO = 1,
3232
};
@@ -201,12 +201,16 @@ class FFT : public core::TotalInterface
201201
bool isInverse,
202202
PaddingType paddingType = PaddingType::CLAMP_TO_EDGE)
203203
{
204-
uint32_t is_inverse_u = isInverse;
204+
205+
uint8_t isInverse_u8 = isInverse;
206+
uint8_t direction_u8 = static_cast<uint8_t>(direction);
207+
uint8_t paddingType_u8 = static_cast<uint8_t>(paddingType);
208+
209+
uint32_t packed = (direction_u8 << 16u) | (isInverse_u8 << 8u) | paddingType_u8;
210+
205211
driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, 0u, sizeof(uint32_t) * 3, &inputDimension);
206212
driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 4, sizeof(uint32_t) * 3, &paddedInputDimension);
207-
driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 8, sizeof(uint32_t), &direction);
208-
driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 9, sizeof(uint32_t), &is_inverse_u);
209-
driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 10, sizeof(uint32_t), &paddingType);
213+
driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 8, sizeof(uint32_t), &packed);
210214
}
211215

212216
// Kernel Normalization

src/nbl/ext/FFT/FFT.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ using namespace ext::FFT;
1414

1515
core::SRange<const asset::SPushConstantRange> FFT::getDefaultPushConstantRanges()
1616
{
17-
static const asset::SPushConstantRange ranges[5] =
17+
static const asset::SPushConstantRange ranges[3] =
1818
{
1919
{
2020
ISpecializedShader::ESS_COMPUTE,
@@ -31,18 +31,8 @@ core::SRange<const asset::SPushConstantRange> FFT::getDefaultPushConstantRanges(
3131
sizeof(uint32_t) * 8,
3232
sizeof(uint32_t)
3333
},
34-
{
35-
ISpecializedShader::ESS_COMPUTE,
36-
sizeof(uint32_t) * 9,
37-
sizeof(uint32_t)
38-
},
39-
{
40-
ISpecializedShader::ESS_COMPUTE,
41-
sizeof(uint32_t) * 10,
42-
sizeof(uint32_t)
43-
},
4434
};
45-
return {ranges, ranges+5};
35+
return {ranges, ranges+3};
4636
}
4737

4838
core::SRange<const video::IGPUDescriptorSetLayout::SBinding> FFT::getDefaultBindings(video::IVideoDriver* driver, DataType inputType)

0 commit comments

Comments
 (0)