@@ -114,36 +114,6 @@ inline void updateDescriptorSet_Convolution (
114
114
115
115
driver->updateDescriptorSets (descCount, pWrites, 0u , nullptr );
116
116
}
117
-
118
- static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_LastFFT (video::IVideoDriver* driver)
119
- {
120
- static IGPUDescriptorSetLayout::SBinding bnd[] =
121
- {
122
- {
123
- 0u ,
124
- EDT_STORAGE_BUFFER,
125
- 1u ,
126
- ISpecializedShader::ESS_COMPUTE,
127
- nullptr
128
- },
129
- {
130
- 1u ,
131
- EDT_STORAGE_IMAGE,
132
- 1u ,
133
- ISpecializedShader::ESS_COMPUTE,
134
- nullptr
135
- },
136
- };
137
-
138
- using FFTClass = ext::FFT::FFT;
139
- core::SRange<const asset::SPushConstantRange> pcRange = FFTClass::getDefaultPushConstantRanges ();
140
- core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof (bnd)/sizeof (IGPUDescriptorSetLayout::SBinding)};;
141
-
142
- return driver->createGPUPipelineLayout (
143
- pcRange.begin (),pcRange.end (),
144
- driver->createGPUDescriptorSetLayout (bindings.begin (),bindings.end ()),nullptr ,nullptr ,nullptr
145
- );
146
- }
147
117
inline void updateDescriptorSet_LastFFT (
148
118
video::IVideoDriver * driver,
149
119
video::IGPUDescriptorSet * set,
@@ -183,10 +153,13 @@ inline void updateDescriptorSet_LastFFT (
183
153
using nbl_glsl_ext_FFT_Parameters_t = ext::FFT::FFT::Parameters_t;
184
154
struct vec2
185
155
{
186
- float x;
187
- float y;
156
+ float x,y;
188
157
};
189
- #include " convolve_parameters.glsl"
158
+ struct ivec2
159
+ {
160
+ int32_t x,y;
161
+ };
162
+ #include " extra_parameters.glsl"
190
163
191
164
192
165
int main ()
@@ -359,10 +332,38 @@ int main()
359
332
driver->createGPUDescriptorSetLayout (bindings.begin (),bindings.end ()),nullptr ,nullptr ,nullptr
360
333
);
361
334
}();
335
+ auto lastFFTPipelineLayout = [driver]() -> auto
336
+ {
337
+ IGPUDescriptorSetLayout::SBinding bnd[] =
338
+ {
339
+ {
340
+ 0u ,
341
+ EDT_STORAGE_BUFFER,
342
+ 1u ,
343
+ ISpecializedShader::ESS_COMPUTE,
344
+ nullptr
345
+ },
346
+ {
347
+ 1u ,
348
+ EDT_STORAGE_IMAGE,
349
+ 1u ,
350
+ ISpecializedShader::ESS_COMPUTE,
351
+ nullptr
352
+ },
353
+ };
354
+
355
+ const asset::SPushConstantRange pcRange = {ISpecializedShader::ESS_COMPUTE,0u ,sizeof (image_store_parameters_t )};
356
+ core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof (bnd)/sizeof (IGPUDescriptorSetLayout::SBinding)};;
357
+
358
+ return driver->createGPUPipelineLayout (
359
+ &pcRange,&pcRange+1 ,
360
+ driver->createGPUDescriptorSetLayout (bindings.begin (),bindings.end ()),nullptr ,nullptr ,nullptr
361
+ );
362
+ }();
362
363
363
364
float bloomScale = 1 .f ;
364
365
const auto kerDim = kerImageView->getCreationParameters ().image ->getCreationParameters ().extent ;
365
- const auto paddedSrcDim = [srcDim,kerDim,bloomScale]() -> auto
366
+ const auto marginSrcDim = [srcDim,kerDim,bloomScale]() -> auto
366
367
{
367
368
auto tmp = srcDim;
368
369
tmp.width += kerDim.width *bloomScale-1u ;
@@ -373,8 +374,8 @@ int main()
373
374
bloomScale = 0.5 ;
374
375
constexpr bool useHalfFloats = true ;
375
376
// Allocate Output Buffer
376
- auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem (FFTClass::getOutputBufferSize (useHalfFloats,paddedSrcDim ,srcNumChannels));
377
- auto fftOutputBuffer_1 = driver->createDeviceLocalGPUBufferOnDedMem (FFTClass::getOutputBufferSize (useHalfFloats,paddedSrcDim ,srcNumChannels));
377
+ auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem (FFTClass::getOutputBufferSize (useHalfFloats,marginSrcDim ,srcNumChannels));
378
+ auto fftOutputBuffer_1 = driver->createDeviceLocalGPUBufferOnDedMem (FFTClass::getOutputBufferSize (useHalfFloats,marginSrcDim ,srcNumChannels));
378
379
core::smart_refctd_ptr<IGPUImageView> kernelNormalizedSpectrums[channelCountOverride];
379
380
380
381
auto updateDescriptorSet = [driver](video::IGPUDescriptorSet* set, core::smart_refctd_ptr<IGPUImageView> inputImageDescriptor, asset::ISampler::E_TEXTURE_CLAMP textureWrap, core::smart_refctd_ptr<IGPUBuffer> outputBufferDescriptor) -> void
@@ -581,11 +582,11 @@ int main()
581
582
}
582
583
583
584
// pipelines
584
- auto fft_x = core::make_smart_refctd_ptr<FFTClass>(driver,paddedSrcDim .width ,useHalfFloats);
585
- auto fft_y = core::make_smart_refctd_ptr<FFTClass>(driver,paddedSrcDim .height ,useHalfFloats);
585
+ auto fft_x = core::make_smart_refctd_ptr<FFTClass>(driver,marginSrcDim .width ,useHalfFloats);
586
+ auto fft_y = core::make_smart_refctd_ptr<FFTClass>(driver,marginSrcDim .height ,useHalfFloats);
586
587
auto fftPipeline_ImageInput = driver->createGPUComputePipeline (nullptr ,core::smart_refctd_ptr (imageFirstFFTPipelineLayout),createShader (driver,fft_x.get (), " ../image_first_fft.comp" ));
587
588
auto convolvePipeline = driver->createGPUComputePipeline (nullptr , std::move (convolvePipelineLayout), createShader (driver,fft_y.get (), " ../fft_convolve_ifft.comp" ));
588
- auto lastFFTPipeline = driver->createGPUComputePipeline (nullptr , getPipelineLayout_LastFFT (driver ), createShader (driver,fft_x.get (), " ../last_fft.comp" ));
589
+ auto lastFFTPipeline = driver->createGPUComputePipeline (nullptr , std::move (lastFFTPipelineLayout ), createShader (driver,fft_x.get (), " ../last_fft.comp" ));
589
590
590
591
// Src FFT X
591
592
auto fftDescriptorSet_Src_FFT_X = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(imageFirstFFTPipelineLayout->getDescriptorSetLayout (0u )));
@@ -612,7 +613,7 @@ int main()
612
613
FFTClass::Parameters_t fftPushConstants[3 ];
613
614
FFTClass::DispatchInfo_t fftDispatchInfo[3 ];
614
615
const ISampler::E_TEXTURE_CLAMP fftPadding[2 ] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR};
615
- const auto passes = FFTClass::buildParameters (false ,srcNumChannels,srcDim,fftPushConstants,fftDispatchInfo,fftPadding,paddedSrcDim );
616
+ const auto passes = FFTClass::buildParameters (false ,srcNumChannels,srcDim,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim );
616
617
{
617
618
fftPushConstants[1 ].output_strides = fftPushConstants[1 ].input_strides ; // override for less work and storage (dont need to store the extra Y-slices after iFFT)
618
619
fftPushConstants[2 ].input_dimensions = fftPushConstants[1 ].input_dimensions ;
@@ -649,6 +650,14 @@ int main()
649
650
// Last FFT Padding and Copy to GPU Image
650
651
driver->bindComputePipeline (lastFFTPipeline.get ());
651
652
driver->bindDescriptorSets (EPBP_COMPUTE, lastFFTPipeline->getLayout (), 0u , 1u , &lastFFTDescriptorSet.get (), nullptr );
653
+ {
654
+ const auto paddedSrcDim = FFTClass::padDimensions (marginSrcDim);
655
+ ivec2 unpad_offset = { 0 ,0 };
656
+ for (auto i=0u ; i<2u ; i++)
657
+ if (fftDispatchInfo[3 ].workGroupCount [i]>1u )
658
+ (&unpad_offset.x )[i] = ((&paddedSrcDim.width )[i]-(&srcDim.width )[i])>>1u ;
659
+ driver->pushConstants (lastFFTPipeline->getLayout (),ISpecializedShader::ESS_COMPUTE,offsetof (image_store_parameters_t ,unpad_offset),sizeof (image_store_parameters_t ::unpad_offset),&unpad_offset);
660
+ }
652
661
FFTClass::dispatchHelper (driver, lastFFTPipeline->getLayout (), fftPushConstants[2 ], fftDispatchInfo[2 ]);
653
662
654
663
if (!savedToFile)
0 commit comments