Skip to content

Commit 52c2cdd

Browse files
get rid of normalization from the FFT extension
1 parent 32323bc commit 52c2cdd

File tree

7 files changed

+103
-177
lines changed

7 files changed

+103
-177
lines changed

examples_tests/49.ComputeFFT/last_fft.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getData(in uvec3 coordinate, in uint channel)
4646
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
4747
{
4848
ivec2 coords = ivec2(coordinate.xy) - ivec2(pc.kernel_dimension.xy / 2);
49+
//const ivec2 coords = ivec2(coordinate.xy);
4950
vec4 color_value = imageLoad(outImage, coords);
5051
color_value[channel] = complex_value.x;
5152
imageStore(outImage, coords, color_value);

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -358,11 +358,40 @@ int main()
358358

359359
auto fftGPUSpecializedShader_SSBOInput = FFTClass::createShader(driver, FFTClass::DataType::SSBO, maxPaddedDimensionSize);
360360
auto fftGPUSpecializedShader_ImageInput = FFTClass::createShader(driver, FFTClass::DataType::TEXTURE2D, maxPaddedDimensionSize);
361-
auto fftGPUSpecializedShader_KernelNormalization = FFTClass::createKernelNormalizationShader(driver, am);
361+
auto fftGPUSpecializedShader_KernelNormalization = [&]() -> auto
362+
{
363+
IAssetLoader::SAssetLoadParams lp;
364+
auto shaderAsset = am->getAsset("../normalization.comp", lp);
365+
auto stuff = driver->getGPUObjectsFromAssets<asset::ICPUSpecializedShader>(shaderAsset.getContents(),nullptr);
366+
return *stuff->begin();
367+
}();
362368

363369
auto fftPipelineLayout_SSBOInput = FFTClass::getDefaultPipelineLayout(driver, FFTClass::DataType::SSBO);
364370
auto fftPipelineLayout_ImageInput = FFTClass::getDefaultPipelineLayout(driver, FFTClass::DataType::TEXTURE2D);
365-
auto fftPipelineLayout_KernelNormalization = FFTClass::getPipelineLayout_KernelNormalization(driver);
371+
auto fftPipelineLayout_KernelNormalization = [&]() -> auto
372+
{
373+
static IGPUDescriptorSetLayout::SBinding bnd[] =
374+
{
375+
{
376+
0u,
377+
EDT_STORAGE_BUFFER,
378+
1u,
379+
ISpecializedShader::ESS_COMPUTE,
380+
nullptr,
381+
},
382+
{
383+
1u,
384+
EDT_STORAGE_BUFFER,
385+
1u,
386+
ISpecializedShader::ESS_COMPUTE,
387+
nullptr,
388+
},
389+
};
390+
return driver->createGPUPipelineLayout(
391+
nullptr,nullptr,
392+
driver->createGPUDescriptorSetLayout(bnd,bnd+2),nullptr,nullptr,nullptr
393+
);
394+
}();
366395

367396
auto fftPipeline_SSBOInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_SSBOInput), std::move(fftGPUSpecializedShader_SSBOInput));
368397
auto fftPipeline_ImageInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_ImageInput), std::move(fftGPUSpecializedShader_ImageInput));
@@ -396,8 +425,40 @@ int main()
396425
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Ker_FFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
397426

398427
// Normalization of FFT Y result
399-
auto fftDescriptorSet_KernelNormalization = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_KernelNormalization->getDescriptorSetLayout(0u)));
400-
FFTClass::updateDescriptorSet_KernelNormalization(driver, fftDescriptorSet_KernelNormalization.get(), fftOutputBuffer_1, fftOutputBuffer_KernelNormalized);
428+
auto fftDescriptorSet_KernelNormalization = [&]() -> auto
429+
{
430+
auto dset = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_KernelNormalization->getDescriptorSetLayout(0u)));
431+
432+
video::IGPUDescriptorSet::SDescriptorInfo pInfos[2];
433+
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[2];
434+
435+
for (auto i = 0; i < 2; i++)
436+
{
437+
pWrites[i].dstSet = dset.get();
438+
pWrites[i].arrayElement = 0u;
439+
pWrites[i].count = 1u;
440+
pWrites[i].info = pInfos + i;
441+
}
442+
443+
// In Buffer
444+
pWrites[0].binding = 0;
445+
pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER;
446+
pWrites[0].count = 1;
447+
pInfos[0].desc = fftOutputBuffer_1;
448+
pInfos[0].buffer.size = fftOutputBuffer_1->getSize();
449+
pInfos[0].buffer.offset = 0u;
450+
451+
// Out Buffer
452+
pWrites[1].binding = 1;
453+
pWrites[1].descriptorType = asset::EDT_STORAGE_BUFFER;
454+
pWrites[1].count = 1;
455+
pInfos[1].desc = fftOutputBuffer_KernelNormalized;
456+
pInfos[1].buffer.size = fftOutputBuffer_KernelNormalized->getSize();
457+
pInfos[1].buffer.offset = 0u;
458+
459+
driver->updateDescriptorSets(2u, pWrites, 0u, nullptr);
460+
return dset;
461+
}();
401462

402463
// Ker Image FFT X
403464
driver->bindComputePipeline(fftPipeline_ImageInput.get());
@@ -420,12 +481,16 @@ int main()
420481
// Ker Normalization
421482
driver->bindComputePipeline(fftPipeline_KernelNormalization.get());
422483
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_KernelNormalization.get(), 0u, 1u, &fftDescriptorSet_KernelNormalization.get(), nullptr);
423-
FFTClass::dispatchKernelNormalization(driver, paddedDim, srcNumChannels);
484+
{
485+
const uint32_t dispatchSizeX = (paddedDim.width*paddedDim.height*paddedDim.depth*srcNumChannels-1u)/FFTClass::DEFAULT_WORK_GROUP_SIZE+1u;
486+
driver->dispatch(dispatchSizeX,1,1);
487+
FFTClass::defaultBarrier();
488+
}
424489
}
425490

426491
// Src FFT X
427492
auto fftDescriptorSet_Src_FFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_ImageInput->getDescriptorSetLayout(0u)));
428-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Src_FFT_X.get(), srcImageView, fftOutputBuffer_0, ISampler::ETC_CLAMP_TO_EDGE);
493+
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Src_FFT_X.get(), srcImageView, fftOutputBuffer_0, ISampler::ETC_MIRROR);
429494

430495
// Convolution
431496
auto convolveDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(convolvePipelineLayout->getDescriptorSetLayout(0u)));
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#version 430 core
2+
layout(local_size_x=256, local_size_y=1, local_size_z=1) in;
3+
4+
#include "nbl/builtin/glsl/math/complex.glsl"
5+
6+
layout(set=0, binding=0) restrict readonly buffer InBuffer
7+
{
8+
nbl_glsl_complex in_data[];
9+
};
10+
11+
layout(set=0, binding=1) restrict buffer OutBuffer
12+
{
13+
nbl_glsl_complex out_data[];
14+
};
15+
16+
void main()
17+
{
18+
const float power = length(in_data[0]);
19+
#if 0
20+
const uint k = bitfieldReverse(gl_GlobalInvocationID.x%2048u)>>21u;
21+
const uint l = bitfieldReverse(gl_GlobalInvocationID.x/2048u)>>21u;
22+
nbl_glsl_complex shift = nbl_glsl_expImaginary(-nbl_glsl_PI*float(k+l));
23+
//shift.x = 1.f;
24+
//shift.y = 0.f;
25+
out_data[gl_GlobalInvocationID.x] = nbl_glsl_complex_mul(in_data[gl_GlobalInvocationID.x],shift)/power;
26+
#endif
27+
out_data[gl_GlobalInvocationID.x] = in_data[gl_GlobalInvocationID.x]/power;
28+
}

include/nbl/builtin/glsl/ext/FFT/normalization.comp

Lines changed: 0 additions & 21 deletions
This file was deleted.

include/nbl/ext/FFT/FFT.h

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ class FFT : public core::TotalInterface
6060
uint32_t dims[3];
6161
};
6262

63+
_NBL_STATIC_INLINE_CONSTEXPR uint32_t DEFAULT_WORK_GROUP_SIZE = 256u;
64+
6365
// returns dispatch size and fills the uniform data
6466
static inline DispatchInfo_t buildParameters(
6567
asset::VkExtent3D const & paddedInputDimensions,
@@ -280,27 +282,11 @@ class FFT : public core::TotalInterface
280282
driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, 0u, sizeof(Parameters_t), &params);
281283
}
282284

283-
// Kernel Normalization
284-
285-
static core::smart_refctd_ptr<video::IGPUSpecializedShader> createKernelNormalizationShader(video::IVideoDriver* driver, asset::IAssetManager* am);
286-
287-
static core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_KernelNormalization(video::IVideoDriver* driver);
288-
289-
static void updateDescriptorSet_KernelNormalization(
290-
video::IVideoDriver * driver,
291-
video::IGPUDescriptorSet * set,
292-
core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor,
293-
core::smart_refctd_ptr<video::IGPUBuffer> normalizedKernelBufferDescriptor);
294-
295-
static void dispatchKernelNormalization(video::IVideoDriver* driver, asset::VkExtent3D const & paddedDimension, uint32_t numChannels);
285+
static void defaultBarrier();
296286

297287
private:
298288
FFT() = delete;
299289
//~FFT() = delete;
300-
301-
_NBL_STATIC_INLINE_CONSTEXPR uint32_t DEFAULT_WORK_GROUP_SIZE = 256u;
302-
303-
static void defaultBarrier();
304290
};
305291

306292

src/nbl/builtin/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ set(nbl_resources_to_embed
139139
"nbl/builtin/glsl/ext/FFT/fft.glsl"
140140
"nbl/builtin/glsl/ext/FFT/parameters_struct.glsl"
141141
"nbl/builtin/glsl/ext/FFT/parameters.glsl"
142-
"nbl/builtin/glsl/ext/FFT/normalization.comp"
143142
"nbl/builtin/glsl/ext/LumaMeter/common.glsl"
144143
"nbl/builtin/glsl/ext/LumaMeter/impl.glsl"
145144
"nbl/builtin/glsl/ext/ToneMapper/operators.glsl"

src/nbl/ext/FFT/FFT.cpp

Lines changed: 0 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -120,135 +120,3 @@ void FFT::defaultBarrier()
120120
{
121121
COpenGLExtensionHandler::pGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
122122
}
123-
124-
// Kernel Normalization
125-
126-
core::smart_refctd_ptr<video::IGPUSpecializedShader> FFT::createKernelNormalizationShader(video::IVideoDriver* driver, IAssetManager* am)
127-
{
128-
#if 1
129-
IAssetLoader::SAssetLoadParams lp;
130-
auto file_path = "../../../include/nbl/builtin/glsl/ext/FFT/normalization.comp";
131-
auto shaderAsset = am->getAsset(file_path, lp);
132-
auto cpucs = IAsset::castDown<ICPUSpecializedShader>(shaderAsset.getContents().begin()[0]);
133-
auto cs = driver->createGPUShader(nbl::core::smart_refctd_ptr<const ICPUShader>((cpucs->getUnspecialized())));
134-
asset::ISpecializedShader::SInfo csinfo(nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE, file_path);
135-
auto cs_spec = driver->createGPUSpecializedShader(cs.get(), csinfo);
136-
return cs_spec;
137-
#else
138-
const char* sourceFmt =
139-
R"===(#version 430 core
140-
141-
layout(local_size_x=256, local_size_y=1, local_size_z=1) in;
142-
143-
struct nbl_glsl_ext_FFT_output_t
144-
{
145-
vec2 complex_value;
146-
};
147-
148-
layout(set=0, binding=0) restrict readonly buffer InBuffer
149-
{
150-
nbl_glsl_ext_FFT_output_t in_data[];
151-
};
152-
153-
layout(set=0, binding=1) restrict buffer OutBuffer
154-
{
155-
nbl_glsl_ext_FFT_output_t out_data[];
156-
};
157-
158-
void main()
159-
{
160-
float power = length(in_data[0].complex_value);
161-
vec2 normalized_data = in_data[gl_GlobalInvocationID.x].complex_value / power;
162-
out_data[gl_GlobalInvocationID.x].complex_value = normalized_data;
163-
}
164-
)===";
165-
166-
const size_t extraSize = 0;
167-
168-
auto shader = core::make_smart_refctd_ptr<ICPUBuffer>(strlen(sourceFmt)+extraSize+1u);
169-
snprintf(
170-
reinterpret_cast<char*>(shader->getPointer()),shader->getSize(), sourceFmt
171-
);
172-
173-
auto cpuSpecializedShader = core::make_smart_refctd_ptr<ICPUSpecializedShader>(
174-
core::make_smart_refctd_ptr<ICPUShader>(std::move(shader),ICPUShader::buffer_contains_glsl),
175-
ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE}
176-
);
177-
178-
auto gpuShader = driver->createGPUShader(nbl::core::smart_refctd_ptr<const ICPUShader>(cpuSpecializedShader->getUnspecialized()));
179-
180-
auto gpuSpecializedShader = driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo());
181-
182-
return gpuSpecializedShader;
183-
#endif
184-
}
185-
186-
core::smart_refctd_ptr<video::IGPUPipelineLayout> FFT::getPipelineLayout_KernelNormalization(video::IVideoDriver* driver)
187-
{
188-
static IGPUDescriptorSetLayout::SBinding bnd[] =
189-
{
190-
{
191-
0u,
192-
EDT_STORAGE_BUFFER,
193-
1u,
194-
ISpecializedShader::ESS_COMPUTE,
195-
nullptr,
196-
},
197-
{
198-
1u,
199-
EDT_STORAGE_BUFFER,
200-
1u,
201-
ISpecializedShader::ESS_COMPUTE,
202-
nullptr,
203-
},
204-
};
205-
206-
core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)};
207-
208-
return driver->createGPUPipelineLayout(
209-
nullptr,nullptr,
210-
driver->createGPUDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr
211-
);
212-
}
213-
214-
void FFT::updateDescriptorSet_KernelNormalization(
215-
video::IVideoDriver * driver,
216-
video::IGPUDescriptorSet * set,
217-
core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor,
218-
core::smart_refctd_ptr<video::IGPUBuffer> normalizedKernelBufferDescriptor)
219-
{
220-
video::IGPUDescriptorSet::SDescriptorInfo pInfos[2];
221-
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[2];
222-
223-
for (auto i=0; i < 2; i++)
224-
{
225-
pWrites[i].dstSet = set;
226-
pWrites[i].arrayElement = 0u;
227-
pWrites[i].count = 1u;
228-
pWrites[i].info = pInfos+i;
229-
}
230-
231-
// In Buffer
232-
pWrites[0].binding = 0;
233-
pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER;
234-
pWrites[0].count = 1;
235-
pInfos[0].desc = kernelBufferDescriptor;
236-
pInfos[0].buffer.size = kernelBufferDescriptor->getSize();
237-
pInfos[0].buffer.offset = 0u;
238-
239-
// Out Buffer
240-
pWrites[1].binding = 1;
241-
pWrites[1].descriptorType = asset::EDT_STORAGE_BUFFER;
242-
pWrites[1].count = 1;
243-
pInfos[1].desc = normalizedKernelBufferDescriptor;
244-
pInfos[1].buffer.size = normalizedKernelBufferDescriptor->getSize();
245-
pInfos[1].buffer.offset = 0u;
246-
247-
driver->updateDescriptorSets(2u, pWrites, 0u, nullptr);
248-
}
249-
250-
void FFT::dispatchKernelNormalization(video::IVideoDriver* driver, asset::VkExtent3D const & paddedDimension, uint32_t numChannels) {
251-
const uint32_t dispatchSizeX = core::ceil(float(paddedDimension.width * paddedDimension.height * paddedDimension.depth * numChannels) / DEFAULT_WORK_GROUP_SIZE);
252-
driver->dispatch(dispatchSizeX, 1, 1);
253-
defaultBarrier();
254-
}

0 commit comments

Comments
 (0)