Skip to content

Commit e22d91d

Browse files
got rudimentary kernel scaling to work
TODO: strides, padding-aware FFT, push constant padding removal
1 parent 2fadaa4 commit e22d91d

File tree

2 files changed

+16
-10
lines changed

2 files changed

+16
-10
lines changed

examples_tests/49.ComputeFFT/image_first_fft.comp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ layout(set=0, binding=0) uniform sampler2D inputImage;
88
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
99
{
1010
ivec2 inputImageSize = textureSize(inputImage, 0);
11-
vec2 normalizedCoords = (vec2(coordinate.xy) + vec2(0.5f)) / vec2(inputImageSize);
12-
vec4 texelValue= textureLod(inputImage, normalizedCoords, 0);
11+
vec2 normalizedCoords = (vec2(coordinate.xy)+vec2(0.5f))/(vec2(inputImageSize)*KERNEL_SCALE);
12+
vec4 texelValue = textureLod(inputImage, normalizedCoords+vec2(0.5-0.5/KERNEL_SCALE), -log2(KERNEL_SCALE));
1313
return nbl_glsl_complex(texelValue[channel], 0.0f);
1414
}
1515
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,23 @@ constexpr uint32_t channelCountOverride = 3u;
2525
inline core::smart_refctd_ptr<video::IGPUSpecializedShader> createShader(
2626
video::IVideoDriver* driver,
2727
const FFTClass* fft,
28-
const char* includeMainName)
28+
const char* includeMainName,
29+
float kernelScale = 1.f)
2930
{
3031
const char* sourceFmt =
3132
R"===(#version 430 core
3233
3334
#define _NBL_GLSL_WORKGROUP_SIZE_ %u
3435
#define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ %u
3536
#define _NBL_GLSL_EXT_FFT_HALF_STORAGE_ %u
37+
38+
#define KERNEL_SCALE %f
3639
3740
#include "%s"
3841
3942
)===";
4043

41-
const size_t extraSize = 4u+8u+128u;
44+
const size_t extraSize = 4u+8u+8u+128u;
4245

4346
constexpr uint32_t DEFAULT_WORK_GROUP_SIZE = 256u;
4447
auto shader = core::make_smart_refctd_ptr<ICPUBuffer>(strlen(sourceFmt)+extraSize+1u);
@@ -47,6 +50,7 @@ R"===(#version 430 core
4750
DEFAULT_WORK_GROUP_SIZE,
4851
fft->getMaxFFTLength(),
4952
fft->usesHalfFloatStorage() ? 1u:0u,
53+
kernelScale,
5054
includeMainName
5155
);
5256

@@ -235,7 +239,7 @@ int main()
235239
kerImgViewInfo.format = kerImgViewInfo.image->getCreationParameters().format;
236240
kerImgViewInfo.subresourceRange.aspectMask = static_cast<IImage::E_ASPECT_FLAGS>(0u);
237241
kerImgViewInfo.subresourceRange.baseMipLevel = 0;
238-
kerImgViewInfo.subresourceRange.levelCount = 1;
242+
kerImgViewInfo.subresourceRange.levelCount = kerImgViewInfo.image->getCreationParameters().mipLevels;
239243
kerImgViewInfo.subresourceRange.baseArrayLayer = 0;
240244
kerImgViewInfo.subresourceRange.layerCount = 1;
241245
kerImageView = driver->createGPUImageView(std::move(kerImgViewInfo));
@@ -342,15 +346,17 @@ int main()
342346
);
343347
}();
344348

349+
float bloomScale = 1.f;
345350
const auto kerDim = kerImageView->getCreationParameters().image->getCreationParameters().extent;
346-
const auto paddedSrcDim = [srcDim,kerDim]() -> auto
351+
const auto paddedSrcDim = [srcDim,kerDim,bloomScale]() -> auto
347352
{
348353
auto tmp = srcDim;
349-
tmp.width += kerDim.width-1u;
350-
tmp.height += kerDim.height-1u;
351-
tmp.depth += kerDim.depth-1u;
354+
tmp.width += kerDim.width*bloomScale-1u;
355+
tmp.height += kerDim.height*bloomScale-1u;
356+
tmp.depth += kerDim.depth*bloomScale-1u;
352357
return tmp;
353358
}();
359+
bloomScale = 0.5;
354360
constexpr bool useHalfFloats = true;
355361
// Allocate Output Buffer
356362
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(useHalfFloats,paddedSrcDim,srcNumChannels));
@@ -529,7 +535,7 @@ int main()
529535
// Ker Image FFT X
530536
auto fft_x = core::make_smart_refctd_ptr<FFTClass>(driver, kerDim.height, useHalfFloats);
531537
{
532-
auto fftPipeline_ImageInput = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(imageFirstFFTPipelineLayout),createShader(driver,fft_x.get(),"../image_first_fft.comp"));
538+
auto fftPipeline_ImageInput = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(imageFirstFFTPipelineLayout),createShader(driver,fft_x.get(),"../image_first_fft.comp",bloomScale));
533539
driver->bindComputePipeline(fftPipeline_ImageInput.get());
534540
driver->bindDescriptorSets(EPBP_COMPUTE, imageFirstFFTPipelineLayout.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_X.get(), nullptr);
535541
FFTClass::dispatchHelper(driver, imageFirstFFTPipelineLayout.get(), fftPushConstants[0], fftDispatchInfo[0]);

0 commit comments

Comments
 (0)