Skip to content

Commit f54226f

Browse files
committed
Kernel FFT PreComputation
1 parent fc3479e commit f54226f

File tree

4 files changed

+190
-37
lines changed

4 files changed

+190
-37
lines changed

examples_tests/49.ComputeFFT/convolve.comp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ layout(push_constant) uniform PushConstants
3636

3737
void main()
3838
{
39-
float power = length(ker_data[0].complex_value);
40-
vec2 kerData = ker_data[gl_GlobalInvocationID.x].complex_value / power;
39+
// if not already normalized -> divide by power
40+
// float power = length(ker_data[0].complex_value);
41+
vec2 kerData = ker_data[gl_GlobalInvocationID.x].complex_value;
4142
out_data[gl_GlobalInvocationID.x].complex_value =
4243
nbl_glsl_complex_mul(src_data[gl_GlobalInvocationID.x].complex_value, kerData);
4344
}

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -399,12 +399,15 @@ int main()
399399

400400
auto fftGPUSpecializedShader_SSBOInput = FFTClass::createShader(driver, FFTClass::DataType::SSBO, maxPaddedDimensionSize);
401401
auto fftGPUSpecializedShader_ImageInput = FFTClass::createShader(driver, FFTClass::DataType::TEXTURE2D, maxPaddedDimensionSize);
402+
auto fftGPUSpecializedShader_KernelNormalization = FFTClass::createKernelNormalizationShader(driver);
402403

403404
auto fftPipelineLayout_SSBOInput = FFTClass::getDefaultPipelineLayout(driver, FFTClass::DataType::SSBO);
404405
auto fftPipelineLayout_ImageInput = FFTClass::getDefaultPipelineLayout(driver, FFTClass::DataType::TEXTURE2D);
406+
auto fftPipelineLayout_KernelNormalization = FFTClass::getPipelineLayout_KernelNormalization(driver);
405407

406408
auto fftPipeline_SSBOInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_SSBOInput), std::move(fftGPUSpecializedShader_SSBOInput));
407409
auto fftPipeline_ImageInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_ImageInput), std::move(fftGPUSpecializedShader_ImageInput));
410+
auto fftPipeline_KernelNormalization = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_KernelNormalization), std::move(fftGPUSpecializedShader_KernelNormalization));
408411

409412
auto fftDispatchInfo_Horizontal = FFTClass::buildParameters(paddedDim, FFTClass::Direction::X, srcNumChannels);
410413
auto fftDispatchInfo_Vertical = FFTClass::buildParameters(paddedDim, FFTClass::Direction::Y, srcNumChannels);
@@ -413,11 +416,51 @@ int main()
413416
auto convolvePipelineLayout = getPipelineLayout_Convolution(driver);
414417
auto convolvePipeline = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(convolvePipelineLayout), std::move(convolveShader));
415418
auto convolveDispatchInfo = getDispatchInfo_Convolution(paddedDim, srcNumChannels);
419+
416420

417421
// Allocate Output Buffer
418-
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(paddedDim, srcNumChannels)); // result of: srcFFTX and kerFFTX and Convolution
422+
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(paddedDim, srcNumChannels)); // result of: srcFFTX and kerFFTX and Convolution and IFFTY
419423
auto fftOutputBuffer_1 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(paddedDim, srcNumChannels)); // result of: srcFFTY and IFFTX
420-
auto fftOutputBuffer_2 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(paddedDim, srcNumChannels)); // result of: kerFFTY and IFFTY
424+
auto fftOutputBuffer_KernelNormalized = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(paddedDim, srcNumChannels)); // result of: kerFFTY
425+
426+
427+
// Precompute Kernel FFT
428+
{
429+
// Ker FFT X
430+
auto fftDescriptorSet_Ker_FFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_ImageInput->getDescriptorSetLayout(0u)));
431+
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Ker_FFT_X.get(), kerImageView, fftOutputBuffer_0);
432+
433+
// Ker FFT Y
434+
auto fftDescriptorSet_Ker_FFT_Y = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
435+
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Ker_FFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
436+
437+
// Normalization of FFT Y result
438+
auto fftDescriptorSet_KernelNormalization = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_KernelNormalization->getDescriptorSetLayout(0u)));
439+
FFTClass::updateDescriptorSet_KernelNormalization(driver, fftDescriptorSet_KernelNormalization.get(), fftOutputBuffer_1, fftOutputBuffer_KernelNormalized);
440+
441+
// Ker Image FFT X
442+
driver->bindComputePipeline(fftPipeline_ImageInput.get());
443+
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_ImageInput.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_X.get(), nullptr);
444+
FFTClass::pushConstants(driver, fftPipelineLayout_ImageInput.get(), kerDim, paddedDim, FFTClass::Direction::X, false, FFTClass::PaddingType::FILL_WITH_ZERO);
445+
FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
446+
447+
// Ker Image FFT Y
448+
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
449+
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_Y.get(), nullptr);
450+
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, false);
451+
FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
452+
453+
// Ker Image FFT Y
454+
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
455+
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_Y.get(), nullptr);
456+
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, false);
457+
FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
458+
459+
// Ker Normalization
460+
driver->bindComputePipeline(fftPipeline_KernelNormalization.get());
461+
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_KernelNormalization.get(), 0u, 1u, &fftDescriptorSet_KernelNormalization.get(), nullptr);
462+
FFTClass::dispatchKernelNormalization(driver, paddedDim, srcNumChannels);
463+
}
421464

422465
// Src FFT X
423466
auto fftDescriptorSet_Src_FFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_ImageInput->getDescriptorSetLayout(0u)));
@@ -426,35 +469,24 @@ int main()
426469
// Src FFT Y
427470
auto fftDescriptorSet_Src_FFT_Y = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
428471
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Src_FFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
429-
430-
431-
// Ker FFT X
432-
auto fftDescriptorSet_Ker_FFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_ImageInput->getDescriptorSetLayout(0u)));
433-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Ker_FFT_X.get(), kerImageView, fftOutputBuffer_0);
434472

435-
// Ker FFT Y
436-
auto fftDescriptorSet_Ker_FFT_Y = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
437-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Ker_FFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_2);
438-
439473
// Convolution
440474
auto convolveDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(convolvePipelineLayout->getDescriptorSetLayout(0u)));
441-
updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_1, fftOutputBuffer_2, fftOutputBuffer_0);
475+
updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_1, fftOutputBuffer_KernelNormalized, fftOutputBuffer_0);
442476

443477
// IFFT X
444478
auto fftDescriptorSet_IFFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
445479
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_X.get(), fftOutputBuffer_0, fftOutputBuffer_1);
446480

447481
// IFFT Y
448482
auto fftDescriptorSet_IFFT_Y = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
449-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_Y.get(), fftOutputBuffer_1, fftOutputBuffer_2);
483+
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_Y.get(), fftOutputBuffer_1, fftOutputBuffer_0);
450484

451-
452-
453485
auto removePaddingShader = createShader_RemovePadding(driver, glslc, fs);
454486
auto removePaddingPipelineLayout = getPipelineLayout_RemovePadding(driver);
455487
auto removePaddingPipeline = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(removePaddingPipelineLayout), std::move(removePaddingShader));
456488
auto removePaddingDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(removePaddingPipelineLayout->getDescriptorSetLayout(0u)));
457-
updateDescriptorSet_RemovePadding(driver, removePaddingDescriptorSet.get(), fftOutputBuffer_2, outImgView);
489+
updateDescriptorSet_RemovePadding(driver, removePaddingDescriptorSet.get(), fftOutputBuffer_0, outImgView);
458490
auto removePaddingDispatchInfo = getDispatchInfo_RemovePadding(outImageDim);
459491

460492
uint32_t outBufferIx = 0u;
@@ -479,21 +511,6 @@ int main()
479511
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, false);
480512
FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
481513

482-
483-
// Ker Image FFT X
484-
driver->bindComputePipeline(fftPipeline_ImageInput.get());
485-
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_ImageInput.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_X.get(), nullptr);
486-
FFTClass::pushConstants(driver, fftPipelineLayout_ImageInput.get(), kerDim, paddedDim, FFTClass::Direction::X, false, FFTClass::PaddingType::FILL_WITH_ZERO);
487-
FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
488-
489-
// Ker Image FFT Y
490-
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
491-
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_Y.get(), nullptr);
492-
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, false);
493-
FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
494-
495-
496-
497514
// Convolution
498515
driver->bindComputePipeline(convolvePipeline.get());
499516
driver->bindDescriptorSets(EPBP_COMPUTE, convolvePipelineLayout.get(), 0u, 1u, &convolveDescriptorSet.get(), nullptr);

include/nbl/ext/FFT/FFT.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,8 @@ class FFT : public core::TotalInterface
106106
return (paddedInputDimensions.width * paddedInputDimensions.height * paddedInputDimensions.depth * numChannels) * (sizeof(float) * 2);
107107
}
108108

109-
110109
static core::smart_refctd_ptr<video::IGPUSpecializedShader> createShader(video::IVideoDriver* driver, DataType inputType, uint32_t maxPaddedDimensionSize);
111-
110+
112111
_NBL_STATIC_INLINE_CONSTEXPR uint32_t MAX_DESCRIPTOR_COUNT = 2u;
113112
static inline void updateDescriptorSet(
114113
video::IVideoDriver * driver,
@@ -210,6 +209,20 @@ class FFT : public core::TotalInterface
210209
driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 10, sizeof(uint32_t), &paddingType);
211210
}
212211

212+
// Kernel Normalization
213+
214+
static core::smart_refctd_ptr<video::IGPUSpecializedShader> createKernelNormalizationShader(video::IVideoDriver* driver);
215+
216+
static core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_KernelNormalization(video::IVideoDriver* driver);
217+
218+
static void updateDescriptorSet_KernelNormalization(
219+
video::IVideoDriver * driver,
220+
video::IGPUDescriptorSet * set,
221+
core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor,
222+
core::smart_refctd_ptr<video::IGPUBuffer> normalizedKernelBufferDescriptor);
223+
224+
static void dispatchKernelNormalization(video::IVideoDriver* driver, asset::VkExtent3D const & paddedDimension, uint32_t numChannels);
225+
213226
private:
214227
FFT() = delete;
215228
//~FFT() = delete;

src/nbl/ext/FFT/FFT.cpp

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,5 +242,127 @@ void main()
242242

243243
void FFT::defaultBarrier()
244244
{
245-
COpenGLExtensionHandler::pGlMemoryBarrier(GL_UNIFORM_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT|GL_BUFFER_UPDATE_BARRIER_BIT);
246-
}
245+
COpenGLExtensionHandler::pGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
246+
}
247+
248+
// Kernel Normalization
249+
250+
core::smart_refctd_ptr<video::IGPUSpecializedShader> FFT::createKernelNormalizationShader(video::IVideoDriver* driver)
251+
{
252+
const char* sourceFmt =
253+
R"===(#version 430 core
254+
255+
layout(local_size_x=%u, local_size_y=1, local_size_z=1) in;
256+
257+
struct nbl_glsl_ext_FFT_output_t
258+
{
259+
vec2 complex_value;
260+
};
261+
262+
layout(set=0, binding=0) restrict readonly buffer InBuffer
263+
{
264+
nbl_glsl_ext_FFT_output_t in_data[];
265+
};
266+
267+
layout(set=0, binding=1) restrict buffer OutBuffer
268+
{
269+
nbl_glsl_ext_FFT_output_t out_data[];
270+
};
271+
272+
void main()
273+
{
274+
float power = length(in_data[0].complex_value);
275+
vec2 normalized_data = in_data[gl_GlobalInvocationID.x].complex_value / power;
276+
out_data[gl_GlobalInvocationID.x].complex_value = normalized_data;
277+
}
278+
)===";
279+
280+
const size_t extraSize = 32;
281+
282+
auto shader = core::make_smart_refctd_ptr<ICPUBuffer>(strlen(sourceFmt)+extraSize+1u);
283+
snprintf(
284+
reinterpret_cast<char*>(shader->getPointer()),shader->getSize(), sourceFmt,
285+
DEFAULT_WORK_GROUP_X_DIM
286+
);
287+
288+
auto cpuSpecializedShader = core::make_smart_refctd_ptr<ICPUSpecializedShader>(
289+
core::make_smart_refctd_ptr<ICPUShader>(std::move(shader),ICPUShader::buffer_contains_glsl),
290+
ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE}
291+
);
292+
293+
auto gpuShader = driver->createGPUShader(nbl::core::smart_refctd_ptr<const ICPUShader>(cpuSpecializedShader->getUnspecialized()));
294+
295+
auto gpuSpecializedShader = driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo());
296+
297+
return gpuSpecializedShader;
298+
}
299+
300+
core::smart_refctd_ptr<video::IGPUPipelineLayout> FFT::getPipelineLayout_KernelNormalization(video::IVideoDriver* driver)
301+
{
302+
static IGPUDescriptorSetLayout::SBinding bnd[] =
303+
{
304+
{
305+
0u,
306+
EDT_STORAGE_BUFFER,
307+
1u,
308+
ISpecializedShader::ESS_COMPUTE,
309+
nullptr,
310+
},
311+
{
312+
1u,
313+
EDT_STORAGE_BUFFER,
314+
1u,
315+
ISpecializedShader::ESS_COMPUTE,
316+
nullptr,
317+
},
318+
};
319+
320+
core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)};
321+
322+
return driver->createGPUPipelineLayout(
323+
nullptr,nullptr,
324+
driver->createGPUDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr
325+
);
326+
}
327+
328+
void FFT::updateDescriptorSet_KernelNormalization(
329+
video::IVideoDriver * driver,
330+
video::IGPUDescriptorSet * set,
331+
core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor,
332+
core::smart_refctd_ptr<video::IGPUBuffer> normalizedKernelBufferDescriptor)
333+
{
334+
video::IGPUDescriptorSet::SDescriptorInfo pInfos[2];
335+
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[2];
336+
337+
for (auto i=0; i < 2; i++)
338+
{
339+
pWrites[i].dstSet = set;
340+
pWrites[i].arrayElement = 0u;
341+
pWrites[i].count = 1u;
342+
pWrites[i].info = pInfos+i;
343+
}
344+
345+
// In Buffer
346+
pWrites[0].binding = 0;
347+
pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER;
348+
pWrites[0].count = 1;
349+
pInfos[0].desc = kernelBufferDescriptor;
350+
pInfos[0].buffer.size = kernelBufferDescriptor->getSize();
351+
pInfos[0].buffer.offset = 0u;
352+
353+
// Out Buffer
354+
pWrites[1].binding = 1;
355+
pWrites[1].descriptorType = asset::EDT_STORAGE_BUFFER;
356+
pWrites[1].count = 1;
357+
pInfos[1].desc = normalizedKernelBufferDescriptor;
358+
pInfos[1].buffer.size = normalizedKernelBufferDescriptor->getSize();
359+
pInfos[1].buffer.offset = 0u;
360+
361+
driver->updateDescriptorSets(2u, pWrites, 0u, nullptr);
362+
}
363+
364+
void FFT::dispatchKernelNormalization(video::IVideoDriver* driver, asset::VkExtent3D const & paddedDimension, uint32_t numChannels) {
365+
const uint32_t dispatchSizeX = core::ceil(float(paddedDimension.width * paddedDimension.height * paddedDimension.depth * numChannels) / DEFAULT_WORK_GROUP_X_DIM);
366+
driver->dispatch(dispatchSizeX, 1, 1);
367+
defaultBarrier();
368+
}

0 commit comments

Comments
 (0)