Skip to content

Commit 5f9aa40

Browse files
gloriously save space and computation of the bloom kernel
1 parent 9e059c7 commit 5f9aa40

File tree

3 files changed

+85
-74
lines changed

3 files changed

+85
-74
lines changed

examples_tests/49.ComputeFFT/fft_convolve_ifft.comp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,14 @@ void convolve(in uint item_per_thread_count, in uint ch)
7373
for(uint t=0u; t<item_per_thread_count; t++)
7474
{
7575
uint tid = gl_LocalInvocationIndex + t * _NBL_GLSL_WORKGROUP_SIZE_;
76+
// TODO: refactor
7677
uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
77-
//coords &= uvec3(0xffeu);
78-
coords = bitfieldReverse(coords)>>uvec3(21u,22u,0u);
79-
const nbl_glsl_complex shift = nbl_glsl_expImaginary(-dot(vec2(coords.xy),vec2(512.f)/vec2(2048.f,1024.f))*nbl_glsl_PI); // TODO: does this shift go away later?
78+
const uvec3 log2_size = uvec3(11u, 10u, 0u);
79+
coords = bitfieldReverse(coords)>>(uvec3(32u)-log2_size);
80+
81+
vec2 uv = (vec2(coords.xy))/vec2(uvec2(1u)<<log2_size.xy)+vec2(0.5f)/vec2(textureSize(NormalizedKernel[ch],0));
8082
//
81-
nbl_glsl_complex value = nbl_glsl_complex_mul(nbl_glsl_ext_FFT_impl_values[t],shift);
82-
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(texelFetch(NormalizedKernel[ch],ivec2(coords.xy),0).xy,value);
83+
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(nbl_glsl_ext_FFT_impl_values[t],textureLod(NormalizedKernel[ch],uv,0).xy);
8384
}
8485
}
8586

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 76 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ inline smart_refctd_ptr<IGPUPipelineLayout> getPipelineLayout_Convolution(IVideo
3434
IGPUSampler::SParams params =
3535
{
3636
{
37-
ISampler::ETC_CLAMP_TO_BORDER,
38-
ISampler::ETC_CLAMP_TO_BORDER,
39-
ISampler::ETC_CLAMP_TO_BORDER,
37+
ISampler::ETC_REPEAT,
38+
ISampler::ETC_REPEAT,
39+
ISampler::ETC_REPEAT,
4040
ISampler::ETBC_FLOAT_OPAQUE_BLACK,
4141
ISampler::ETF_LINEAR, // is it needed?
4242
ISampler::ETF_LINEAR,
@@ -350,45 +350,9 @@ int main()
350350
// TODO: re-examine
351351
const VkExtent3D paddedDim = FFTClass::padDimensionToNextPOT(srcDim);
352352
auto fftGPUSpecializedShader_ImageInput = FFTClass::createShader(driver, FFTClass::DataType::TEXTURE2D, paddedDim.width);
353-
auto fftGPUSpecializedShader_KernelNormalization = [&]() -> auto
354-
{
355-
IAssetLoader::SAssetLoadParams lp;
356-
auto shaderAsset = am->getAsset("../normalization.comp", lp);
357-
auto stuff = driver->getGPUObjectsFromAssets<asset::ICPUSpecializedShader>(shaderAsset.getContents(),nullptr);
358-
return *stuff->begin();
359-
}();
360353

361354
auto fftPipelineLayout_ImageInput = FFTClass::getDefaultPipelineLayout(driver, FFTClass::DataType::TEXTURE2D);
362-
auto fftPipelineLayout_KernelNormalization = [&]() -> auto
363-
{
364-
IGPUDescriptorSetLayout::SBinding bnd[] =
365-
{
366-
{
367-
0u,
368-
EDT_STORAGE_BUFFER,
369-
1u,
370-
ISpecializedShader::ESS_COMPUTE,
371-
nullptr
372-
},
373-
{
374-
1u,
375-
EDT_STORAGE_IMAGE,
376-
channelCountOverride,
377-
ISpecializedShader::ESS_COMPUTE,
378-
nullptr
379-
},
380-
};
381-
return driver->createGPUPipelineLayout(
382-
nullptr,nullptr,
383-
driver->createGPUDescriptorSetLayout(bnd,bnd+2),nullptr,nullptr,nullptr
384-
);
385-
}();
386-
387355
auto fftPipeline_ImageInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_ImageInput), std::move(fftGPUSpecializedShader_ImageInput));
388-
auto fftPipeline_KernelNormalization = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_KernelNormalization), std::move(fftGPUSpecializedShader_KernelNormalization));
389-
390-
auto fftDispatchInfo_Horizontal = FFTClass::buildParameters(paddedDim, FFTClass::Direction::X);
391-
auto fftDispatchInfo_Vertical = FFTClass::buildParameters(paddedDim, FFTClass::Direction::Y);
392356

393357
auto convolveShader = createShader_Convolution(driver, am, paddedDim.height);
394358
auto convolvePipelineLayout = getPipelineLayout_Convolution(driver);
@@ -401,35 +365,39 @@ int main()
401365
// Allocate Output Buffer
402366
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(paddedDim, srcNumChannels)); // result of: srcFFTX and kerFFTX and Convolution and IFFTY
403367
auto fftOutputBuffer_1 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(paddedDim, srcNumChannels)); // result of: srcFFTY and IFFTX
404-
auto createKernelSpectrum = [&]() -> auto
405-
{
406-
video::IGPUImage::SCreationParams imageParams;
407-
imageParams.flags = static_cast<asset::IImage::E_CREATE_FLAGS>(0u);
408-
imageParams.type = asset::IImage::ET_2D;
409-
imageParams.format = asset::EF_R16G16_SFLOAT;
410-
imageParams.extent = {paddedDim.width,paddedDim.height,1u};
411-
imageParams.mipLevels = 1u;
412-
imageParams.arrayLayers = 1u;
413-
imageParams.samples = asset::IImage::ESCF_1_BIT;
414-
415-
video::IGPUImageView::SCreationParams viewParams;
416-
viewParams.flags = static_cast<video::IGPUImageView::E_CREATE_FLAGS>(0u);
417-
viewParams.image = driver->createGPUImageOnDedMem(std::move(imageParams),driver->getDeviceLocalGPUMemoryReqs());
418-
viewParams.viewType = video::IGPUImageView::ET_2D;
419-
viewParams.format = asset::EF_R16G16_SFLOAT;
420-
viewParams.components = {};
421-
viewParams.subresourceRange = {};
422-
viewParams.subresourceRange.levelCount = 1u;
423-
viewParams.subresourceRange.layerCount = 1u;
424-
return driver->createGPUImageView(std::move(viewParams));
425-
};
426368
core::smart_refctd_ptr<IGPUImageView> kernelNormalizedSpectrums[channelCountOverride];
427-
for (uint32_t i=0u; i<channelCountOverride; i++)
428-
kernelNormalizedSpectrums[i] = createKernelSpectrum();
429369

430370
// Precompute Kernel FFT
431-
const auto kerDim = kerImageView->getCreationParameters().image->getCreationParameters().extent;
432371
{
372+
const auto kerDim = kerImageView->getCreationParameters().image->getCreationParameters().extent;
373+
const VkExtent3D paddedKerDim = FFTClass::padDimensionToNextPOT(kerDim);
374+
375+
// create kernel spectrums
376+
auto createKernelSpectrum = [&]() -> auto
377+
{
378+
video::IGPUImage::SCreationParams imageParams;
379+
imageParams.flags = static_cast<asset::IImage::E_CREATE_FLAGS>(0u);
380+
imageParams.type = asset::IImage::ET_2D;
381+
imageParams.format = asset::EF_R16G16_SFLOAT;
382+
imageParams.extent = { paddedKerDim.width,paddedKerDim.height,1u};
383+
imageParams.mipLevels = 1u;
384+
imageParams.arrayLayers = 1u;
385+
imageParams.samples = asset::IImage::ESCF_1_BIT;
386+
387+
video::IGPUImageView::SCreationParams viewParams;
388+
viewParams.flags = static_cast<video::IGPUImageView::E_CREATE_FLAGS>(0u);
389+
viewParams.image = driver->createGPUImageOnDedMem(std::move(imageParams),driver->getDeviceLocalGPUMemoryReqs());
390+
viewParams.viewType = video::IGPUImageView::ET_2D;
391+
viewParams.format = asset::EF_R16G16_SFLOAT;
392+
viewParams.components = {};
393+
viewParams.subresourceRange = {};
394+
viewParams.subresourceRange.levelCount = 1u;
395+
viewParams.subresourceRange.layerCount = 1u;
396+
return driver->createGPUImageView(std::move(viewParams));
397+
};
398+
for (uint32_t i=0u; i<channelCountOverride; i++)
399+
kernelNormalizedSpectrums[i] = createKernelSpectrum();
400+
433401
// Ker FFT X
434402
auto fftDescriptorSet_Ker_FFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_ImageInput->getDescriptorSetLayout(0u)));
435403
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Ker_FFT_X.get(), kerImageView, fftOutputBuffer_0, ISampler::ETC_CLAMP_TO_BORDER);
@@ -440,6 +408,30 @@ int main()
440408
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Ker_FFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
441409

442410
// Normalization of FFT Y result
411+
auto fftPipelineLayout_KernelNormalization = [&]() -> auto
412+
{
413+
IGPUDescriptorSetLayout::SBinding bnd[] =
414+
{
415+
{
416+
0u,
417+
EDT_STORAGE_BUFFER,
418+
1u,
419+
ISpecializedShader::ESS_COMPUTE,
420+
nullptr
421+
},
422+
{
423+
1u,
424+
EDT_STORAGE_IMAGE,
425+
channelCountOverride,
426+
ISpecializedShader::ESS_COMPUTE,
427+
nullptr
428+
},
429+
};
430+
return driver->createGPUPipelineLayout(
431+
nullptr,nullptr,
432+
driver->createGPUDescriptorSetLayout(bnd,bnd+2),nullptr,nullptr,nullptr
433+
);
434+
}();
443435
auto fftDescriptorSet_KernelNormalization = [&]() -> auto
444436
{
445437
auto dset = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_KernelNormalization->getDescriptorSetLayout(0u)));
@@ -479,25 +471,37 @@ int main()
479471
return dset;
480472
}();
481473

474+
auto fftDispatchInfo_Horizontal = FFTClass::buildParameters(paddedKerDim, FFTClass::Direction::X);
475+
auto fftDispatchInfo_Vertical = FFTClass::buildParameters(paddedKerDim, FFTClass::Direction::Y);
476+
482477
// Ker Image FFT X
483478
driver->bindComputePipeline(fftPipeline_ImageInput.get());
484479
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_ImageInput.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_X.get(), nullptr);
485-
FFTClass::pushConstants(driver, fftPipelineLayout_ImageInput.get(), kerDim, paddedDim, FFTClass::Direction::X, false, srcNumChannels, FFTClass::PaddingType::FILL_WITH_ZERO);
480+
FFTClass::pushConstants(driver, fftPipelineLayout_ImageInput.get(), kerDim, paddedKerDim, FFTClass::Direction::X, false, srcNumChannels, FFTClass::PaddingType::FILL_WITH_ZERO);
486481
FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
487482

488483
// Ker Image FFT Y
489-
auto fftPipeline_SSBOInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_SSBOInput), FFTClass::createShader(driver,FFTClass::DataType::SSBO,paddedDim.height));
484+
auto fftPipeline_SSBOInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_SSBOInput), FFTClass::createShader(driver,FFTClass::DataType::SSBO,paddedKerDim.height));
490485
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
491486
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_Y.get(), nullptr);
492-
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, false, srcNumChannels);
487+
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedKerDim, paddedKerDim, FFTClass::Direction::Y, false, srcNumChannels);
493488
FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
494489

495490
// Ker Normalization
491+
auto fftPipeline_KernelNormalization = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_KernelNormalization),
492+
[&]() -> auto
493+
{
494+
IAssetLoader::SAssetLoadParams lp;
495+
auto shaderAsset = am->getAsset("../normalization.comp", lp);
496+
auto stuff = driver->getGPUObjectsFromAssets<asset::ICPUSpecializedShader>(shaderAsset.getContents(),nullptr);
497+
return *stuff->begin();
498+
}()
499+
);
496500
driver->bindComputePipeline(fftPipeline_KernelNormalization.get());
497501
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_KernelNormalization.get(), 0u, 1u, &fftDescriptorSet_KernelNormalization.get(), nullptr);
498502
{
499-
const uint32_t dispatchSizeX = (paddedDim.width-1u)/16u+1u;
500-
const uint32_t dispatchSizeY = (paddedDim.height-1u)/16u+1u;
503+
const uint32_t dispatchSizeX = (paddedKerDim.width-1u)/16u+1u;
504+
const uint32_t dispatchSizeY = (paddedKerDim.height-1u)/16u+1u;
501505
driver->dispatch(dispatchSizeX,dispatchSizeY,kerNumChannels);
502506
FFTClass::defaultBarrier();
503507
}
@@ -524,6 +528,9 @@ int main()
524528
auto blitFBO = driver->addFrameBuffer();
525529
blitFBO->attach(video::EFAP_COLOR_ATTACHMENT0, std::move(outImgView));
526530

531+
532+
auto fftDispatchInfo_Horizontal = FFTClass::buildParameters(paddedDim, FFTClass::Direction::X);
533+
auto fftDispatchInfo_Vertical = FFTClass::buildParameters(paddedDim, FFTClass::Direction::Y);
527534
while (device->run() && receiver.keepOpen())
528535
{
529536
driver->beginScene(false, false);

examples_tests/49.ComputeFFT/normalization.comp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ void main()
1818

1919
const float power = length(in_data[0]);
2020
nbl_glsl_complex value = in_data[gl_GlobalInvocationID.x*strides.x+gl_GlobalInvocationID.y*strides.y]/power;
21+
2122

2223
uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>(uvec2(32u)-log2_sizes);
24+
const nbl_glsl_complex shift = nbl_glsl_expImaginary(-float(coord.x+coord.y)*nbl_glsl_PI); // TODO: does this shift go away later?
25+
value = nbl_glsl_complex_mul(value,shift);
2326
imageStore(NormalizedKernel[gl_WorkGroupID.z],ivec2(coord),vec4(value,0.0,0.0));
2427
}

0 commit comments

Comments
 (0)