Skip to content

Commit 24386d7

Browse files
move some more stuff to push constants
1 parent f416c8d commit 24386d7

File tree

4 files changed

+72
-44
lines changed

4 files changed

+72
-44
lines changed

examples_tests/49.ComputeFFT/convolve_parameters.glsl renamed to examples_tests/49.ComputeFFT/extra_parameters.glsl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,10 @@ struct convolve_parameters_t
77
{
88
nbl_glsl_ext_FFT_Parameters_t fft;
99
vec2 kernel_half_pixel_size;
10+
};
11+
12+
struct image_store_parameters_t
13+
{
14+
nbl_glsl_ext_FFT_Parameters_t fft;
15+
ivec2 unpad_offset;
1016
};

examples_tests/49.ComputeFFT/fft_convolve_ifft.comp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) i
44

55
layout(set=0, binding=2) uniform sampler2D NormalizedKernel[3];
66

7-
#include "convolve_parameters.glsl"
7+
#include "extra_parameters.glsl"
88
layout(push_constant) uniform PushConstants
99
{
1010
convolve_parameters_t params;

examples_tests/49.ComputeFFT/last_fft.comp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,25 @@ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) i
44
layout(set=0, binding=1, rgba16f) uniform image2D outImage;
55
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
66

7+
8+
#include "extra_parameters.glsl"
9+
layout(push_constant) uniform PushConstants
10+
{
11+
image_store_parameters_t params;
12+
} pc;
13+
#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
14+
15+
nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
16+
{
17+
return pc.params.fft;
18+
}
19+
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
20+
21+
722
#include <nbl/builtin/glsl/math/complex.glsl>
823
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
924
{
10-
// TODO PC
11-
const ivec2 padding = imageSize(outImage).x!=512u ? ivec2(384,0):ivec2(0);
12-
const ivec2 coords = ivec2(coordinate.xy)-padding;
25+
const ivec2 coords = ivec2(coordinate.xy)-pc.params.unpad_offset;
1326

1427
if (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageSize(outImage),coords)))
1528
{

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -114,36 +114,6 @@ inline void updateDescriptorSet_Convolution (
114114

115115
driver->updateDescriptorSets(descCount, pWrites, 0u, nullptr);
116116
}
117-
118-
static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_LastFFT(video::IVideoDriver* driver)
119-
{
120-
static IGPUDescriptorSetLayout::SBinding bnd[] =
121-
{
122-
{
123-
0u,
124-
EDT_STORAGE_BUFFER,
125-
1u,
126-
ISpecializedShader::ESS_COMPUTE,
127-
nullptr
128-
},
129-
{
130-
1u,
131-
EDT_STORAGE_IMAGE,
132-
1u,
133-
ISpecializedShader::ESS_COMPUTE,
134-
nullptr
135-
},
136-
};
137-
138-
using FFTClass = ext::FFT::FFT;
139-
core::SRange<const asset::SPushConstantRange> pcRange = FFTClass::getDefaultPushConstantRanges();
140-
core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)};;
141-
142-
return driver->createGPUPipelineLayout(
143-
pcRange.begin(),pcRange.end(),
144-
driver->createGPUDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr
145-
);
146-
}
147117
inline void updateDescriptorSet_LastFFT (
148118
video::IVideoDriver * driver,
149119
video::IGPUDescriptorSet * set,
@@ -183,10 +153,13 @@ inline void updateDescriptorSet_LastFFT (
183153
using nbl_glsl_ext_FFT_Parameters_t = ext::FFT::FFT::Parameters_t;
184154
struct vec2
185155
{
186-
float x;
187-
float y;
156+
float x,y;
188157
};
189-
#include "convolve_parameters.glsl"
158+
struct ivec2
159+
{
160+
int32_t x,y;
161+
};
162+
#include "extra_parameters.glsl"
190163

191164

192165
int main()
@@ -359,10 +332,38 @@ int main()
359332
driver->createGPUDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr
360333
);
361334
}();
335+
auto lastFFTPipelineLayout = [driver]() -> auto
336+
{
337+
IGPUDescriptorSetLayout::SBinding bnd[] =
338+
{
339+
{
340+
0u,
341+
EDT_STORAGE_BUFFER,
342+
1u,
343+
ISpecializedShader::ESS_COMPUTE,
344+
nullptr
345+
},
346+
{
347+
1u,
348+
EDT_STORAGE_IMAGE,
349+
1u,
350+
ISpecializedShader::ESS_COMPUTE,
351+
nullptr
352+
},
353+
};
354+
355+
const asset::SPushConstantRange pcRange = {ISpecializedShader::ESS_COMPUTE,0u,sizeof(image_store_parameters_t)};
356+
core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)};;
357+
358+
return driver->createGPUPipelineLayout(
359+
&pcRange,&pcRange+1,
360+
driver->createGPUDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr
361+
);
362+
}();
362363

363364
float bloomScale = 1.f;
364365
const auto kerDim = kerImageView->getCreationParameters().image->getCreationParameters().extent;
365-
const auto paddedSrcDim = [srcDim,kerDim,bloomScale]() -> auto
366+
const auto marginSrcDim = [srcDim,kerDim,bloomScale]() -> auto
366367
{
367368
auto tmp = srcDim;
368369
tmp.width += kerDim.width*bloomScale-1u;
@@ -373,8 +374,8 @@ int main()
373374
bloomScale = 0.5;
374375
constexpr bool useHalfFloats = true;
375376
// Allocate Output Buffer
376-
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(useHalfFloats,paddedSrcDim,srcNumChannels));
377-
auto fftOutputBuffer_1 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(useHalfFloats,paddedSrcDim,srcNumChannels));
377+
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(useHalfFloats,marginSrcDim,srcNumChannels));
378+
auto fftOutputBuffer_1 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(useHalfFloats,marginSrcDim,srcNumChannels));
378379
core::smart_refctd_ptr<IGPUImageView> kernelNormalizedSpectrums[channelCountOverride];
379380

380381
auto updateDescriptorSet = [driver](video::IGPUDescriptorSet* set, core::smart_refctd_ptr<IGPUImageView> inputImageDescriptor, asset::ISampler::E_TEXTURE_CLAMP textureWrap, core::smart_refctd_ptr<IGPUBuffer> outputBufferDescriptor) -> void
@@ -581,11 +582,11 @@ int main()
581582
}
582583

583584
// pipelines
584-
auto fft_x = core::make_smart_refctd_ptr<FFTClass>(driver,paddedSrcDim.width,useHalfFloats);
585-
auto fft_y = core::make_smart_refctd_ptr<FFTClass>(driver,paddedSrcDim.height,useHalfFloats);
585+
auto fft_x = core::make_smart_refctd_ptr<FFTClass>(driver,marginSrcDim.width,useHalfFloats);
586+
auto fft_y = core::make_smart_refctd_ptr<FFTClass>(driver,marginSrcDim.height,useHalfFloats);
586587
auto fftPipeline_ImageInput = driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(imageFirstFFTPipelineLayout),createShader(driver,fft_x.get(), "../image_first_fft.comp"));
587588
auto convolvePipeline = driver->createGPUComputePipeline(nullptr, std::move(convolvePipelineLayout), createShader(driver,fft_y.get(), "../fft_convolve_ifft.comp"));
588-
auto lastFFTPipeline = driver->createGPUComputePipeline(nullptr, getPipelineLayout_LastFFT(driver), createShader(driver,fft_x.get(), "../last_fft.comp"));
589+
auto lastFFTPipeline = driver->createGPUComputePipeline(nullptr, std::move(lastFFTPipelineLayout), createShader(driver,fft_x.get(), "../last_fft.comp"));
589590

590591
// Src FFT X
591592
auto fftDescriptorSet_Src_FFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(imageFirstFFTPipelineLayout->getDescriptorSetLayout(0u)));
@@ -612,7 +613,7 @@ int main()
612613
FFTClass::Parameters_t fftPushConstants[3];
613614
FFTClass::DispatchInfo_t fftDispatchInfo[3];
614615
const ISampler::E_TEXTURE_CLAMP fftPadding[2] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR};
615-
const auto passes = FFTClass::buildParameters(false,srcNumChannels,srcDim,fftPushConstants,fftDispatchInfo,fftPadding,paddedSrcDim);
616+
const auto passes = FFTClass::buildParameters(false,srcNumChannels,srcDim,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim);
616617
{
617618
fftPushConstants[1].output_strides = fftPushConstants[1].input_strides; // override for less work and storage (dont need to store the extra Y-slices after iFFT)
618619
fftPushConstants[2].input_dimensions = fftPushConstants[1].input_dimensions;
@@ -649,6 +650,14 @@ int main()
649650
// Last FFT Padding and Copy to GPU Image
650651
driver->bindComputePipeline(lastFFTPipeline.get());
651652
driver->bindDescriptorSets(EPBP_COMPUTE, lastFFTPipeline->getLayout(), 0u, 1u, &lastFFTDescriptorSet.get(), nullptr);
653+
{
654+
const auto paddedSrcDim = FFTClass::padDimensions(marginSrcDim);
655+
ivec2 unpad_offset = { 0,0 };
656+
for (auto i=0u; i<2u; i++)
657+
if (fftDispatchInfo[3].workGroupCount[i]>1u)
658+
(&unpad_offset.x)[i] = ((&paddedSrcDim.width)[i]-(&srcDim.width)[i])>>1u;
659+
driver->pushConstants(lastFFTPipeline->getLayout(),ISpecializedShader::ESS_COMPUTE,offsetof(image_store_parameters_t,unpad_offset),sizeof(image_store_parameters_t::unpad_offset),&unpad_offset);
660+
}
652661
FFTClass::dispatchHelper(driver, lastFFTPipeline->getLayout(), fftPushConstants[2], fftDispatchInfo[2]);
653662

654663
if(!savedToFile)

0 commit comments

Comments
 (0)