@@ -27,47 +27,63 @@ struct DispatchInfo_t
27
27
uint32_t workGroupCount[3 ];
28
28
};
29
29
30
- static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_Convolution (video::IVideoDriver* driver) {
31
- static IGPUDescriptorSetLayout::SBinding bnd[] =
30
+ constexpr uint32_t channelCountOverride = 3u ;
31
+
32
+ inline smart_refctd_ptr<IGPUPipelineLayout> getPipelineLayout_Convolution (IVideoDriver* driver)
33
+ {
34
+ IGPUSampler::SParams params =
32
35
{
33
36
{
37
+ ISampler::ETC_CLAMP_TO_BORDER,
38
+ ISampler::ETC_CLAMP_TO_BORDER,
39
+ ISampler::ETC_CLAMP_TO_BORDER,
40
+ ISampler::ETBC_FLOAT_OPAQUE_BLACK,
41
+ ISampler::ETF_LINEAR, // is it needed?
42
+ ISampler::ETF_LINEAR,
43
+ ISampler::ESMM_NEAREST,
34
44
0u ,
35
- EDT_STORAGE_BUFFER,
36
- 1u ,
37
- ISpecializedShader::ESS_COMPUTE,
38
- nullptr
39
- },
45
+ 0u ,
46
+ ISampler::ECO_ALWAYS
47
+ }
48
+ };
49
+ auto sampler = driver->createGPUSampler (std::move (params));
50
+ smart_refctd_ptr<IGPUSampler> samplers[channelCountOverride];
51
+ std::fill_n (samplers,channelCountOverride,sampler);
52
+
53
+ IGPUDescriptorSetLayout::SBinding bnd[] =
54
+ {
40
55
{
41
- 1u ,
56
+ 0u ,
42
57
EDT_STORAGE_BUFFER,
43
58
1u ,
44
59
ISpecializedShader::ESS_COMPUTE,
45
60
nullptr
46
61
},
47
62
{
48
- 2u ,
49
- EDT_STORAGE_BUFFER,
50
63
1u ,
64
+ EDT_COMBINED_IMAGE_SAMPLER,
65
+ channelCountOverride,
51
66
ISpecializedShader::ESS_COMPUTE,
52
- nullptr
53
- },
67
+ samplers
68
+ }
54
69
};
55
70
56
71
using FFTClass = ext::FFT::FFT;
57
72
core::SRange<const asset::SPushConstantRange> pcRange = FFTClass::getDefaultPushConstantRanges ();
58
- core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof (bnd)/sizeof (IGPUDescriptorSetLayout::SBinding)}; ;
73
+ core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd,bnd+sizeof (bnd)/sizeof (IGPUDescriptorSetLayout::SBinding)};
59
74
60
75
return driver->createGPUPipelineLayout (
61
76
pcRange.begin (),pcRange.end (),
62
77
driver->createGPUDescriptorSetLayout (bindings.begin (),bindings.end ()),nullptr ,nullptr ,nullptr
63
78
);
64
79
}
65
- static inline core::smart_refctd_ptr<video::IGPUSpecializedShader> createShader_Convolution (
80
+
81
+ inline core::smart_refctd_ptr<video::IGPUSpecializedShader> createShader_Convolution (
66
82
video::IVideoDriver* driver,
67
83
IAssetManager* am,
68
84
uint32_t maxDimensionSize)
69
85
{
70
- uint32_t const maxPaddedDimensionSize = core::roundUpToPoT (maxDimensionSize);
86
+ const uint32_t maxPaddedDimensionSize = core::roundUpToPoT (maxDimensionSize);
71
87
72
88
const char * sourceFmt =
73
89
R"===( #version 430 core
@@ -100,21 +116,20 @@ R"===(#version 430 core
100
116
101
117
return gpuSpecializedShader;
102
118
}
103
- static inline void updateDescriptorSet_Convolution (
119
+ inline void updateDescriptorSet_Convolution (
104
120
video::IVideoDriver * driver,
105
121
video::IGPUDescriptorSet * set,
106
122
core::smart_refctd_ptr<video::IGPUBuffer> inputOutputBufferDescriptor,
107
- core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor )
123
+ const core::smart_refctd_ptr<video::IGPUImageView>* kernelNormalizedSpectrumImageDescriptors )
108
124
{
109
125
constexpr uint32_t descCount = 2u ;
110
- video::IGPUDescriptorSet::SDescriptorInfo pInfos[descCount ];
126
+ video::IGPUDescriptorSet::SDescriptorInfo pInfos[1u +channelCountOverride ];
111
127
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[descCount];
112
128
113
129
for (auto i = 0 ; i < descCount; i++)
114
130
{
115
131
pWrites[i].dstSet = set;
116
132
pWrites[i].arrayElement = 0u ;
117
- pWrites[i].count = 1u ;
118
133
pWrites[i].info = pInfos+i;
119
134
}
120
135
@@ -128,11 +143,15 @@ static inline void updateDescriptorSet_Convolution (
128
143
129
144
// Kernel Buffer
130
145
pWrites[1 ].binding = 1 ;
131
- pWrites[1 ].descriptorType = asset::EDT_STORAGE_BUFFER;
132
- pWrites[1 ].count = 1 ;
133
- pInfos[1 ].desc = kernelBufferDescriptor;
134
- pInfos[1 ].buffer .size = kernelBufferDescriptor->getSize ();
135
- pInfos[1 ].buffer .offset = 0u ;
146
+ pWrites[1 ].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER;
147
+ pWrites[1 ].count = channelCountOverride;
148
+ for (uint32_t i=0u ; i<channelCountOverride; i++)
149
+ {
150
+ auto & info = pInfos[1u +i];
151
+ info.desc = kernelNormalizedSpectrumImageDescriptors[i];
152
+ // info.image.imageLayout = ;
153
+ info.image .sampler = nullptr ;
154
+ }
136
155
137
156
driver->updateDescriptorSets (descCount, pWrites, 0u , nullptr );
138
157
}
@@ -339,6 +358,9 @@ int main()
339
358
VkExtent3D kerDim = kerImgInfo.extent ;
340
359
uint32_t srcNumChannels = getFormatChannelCount (srcFormat);
341
360
uint32_t kerNumChannels = getFormatChannelCount (kerFormat);
361
+ // ! OVERRIDE (we dont need alpha)
362
+ srcNumChannels = channelCountOverride;
363
+ kerNumChannels = channelCountOverride;
342
364
assert (srcNumChannels == kerNumChannels); // Just to make sure, because the other case is not handled in this example
343
365
344
366
VkExtent3D paddedDim = FFTClass::padDimensionToNextPOT (srcDim, kerDim);
@@ -370,21 +392,21 @@ int main()
370
392
auto fftPipelineLayout_ImageInput = FFTClass::getDefaultPipelineLayout (driver, FFTClass::DataType::TEXTURE2D);
371
393
auto fftPipelineLayout_KernelNormalization = [&]() -> auto
372
394
{
373
- static IGPUDescriptorSetLayout::SBinding bnd[] =
395
+ IGPUDescriptorSetLayout::SBinding bnd[] =
374
396
{
375
397
{
376
398
0u ,
377
399
EDT_STORAGE_BUFFER,
378
400
1u ,
379
401
ISpecializedShader::ESS_COMPUTE,
380
- nullptr ,
402
+ nullptr
381
403
},
382
404
{
383
405
1u ,
384
- EDT_STORAGE_BUFFER ,
385
- 1u ,
406
+ EDT_STORAGE_IMAGE ,
407
+ channelCountOverride ,
386
408
ISpecializedShader::ESS_COMPUTE,
387
- nullptr ,
409
+ nullptr
388
410
},
389
411
};
390
412
return driver->createGPUPipelineLayout (
@@ -411,8 +433,31 @@ int main()
411
433
// Allocate Output Buffer
412
434
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem (FFTClass::getOutputBufferSize (paddedDim, srcNumChannels)); // result of: srcFFTX and kerFFTX and Convolution and IFFTY
413
435
auto fftOutputBuffer_1 = driver->createDeviceLocalGPUBufferOnDedMem (FFTClass::getOutputBufferSize (paddedDim, srcNumChannels)); // result of: srcFFTY and IFFTX
414
- auto fftOutputBuffer_KernelNormalized = driver->createDeviceLocalGPUBufferOnDedMem (FFTClass::getOutputBufferSize (paddedDim, srcNumChannels)); // result of: kerFFTY
415
-
436
+ auto createKernelSpectrum = [&]() -> auto
437
+ {
438
+ video::IGPUImage::SCreationParams imageParams;
439
+ imageParams.flags = static_cast <asset::IImage::E_CREATE_FLAGS>(0u );
440
+ imageParams.type = asset::IImage::ET_2D;
441
+ imageParams.format = asset::EF_R16G16_SFLOAT;
442
+ imageParams.extent = {paddedDim.width ,paddedDim.height ,1u };
443
+ imageParams.mipLevels = 1u ;
444
+ imageParams.arrayLayers = 1u ;
445
+ imageParams.samples = asset::IImage::ESCF_1_BIT;
446
+
447
+ video::IGPUImageView::SCreationParams viewParams;
448
+ viewParams.flags = static_cast <video::IGPUImageView::E_CREATE_FLAGS>(0u );
449
+ viewParams.image = driver->createGPUImageOnDedMem (std::move (imageParams),driver->getDeviceLocalGPUMemoryReqs ());
450
+ viewParams.viewType = video::IGPUImageView::ET_2D;
451
+ viewParams.format = asset::EF_R16G16_SFLOAT;
452
+ viewParams.components = {};
453
+ viewParams.subresourceRange = {};
454
+ viewParams.subresourceRange .levelCount = 1u ;
455
+ viewParams.subresourceRange .layerCount = 1u ;
456
+ return driver->createGPUImageView (std::move (viewParams));
457
+ };
458
+ core::smart_refctd_ptr<IGPUImageView> kernelNormalizedSpectrums[channelCountOverride];
459
+ for (uint32_t i=0u ; i<channelCountOverride; i++)
460
+ kernelNormalizedSpectrums[i] = createKernelSpectrum ();
416
461
417
462
// Precompute Kernel FFT
418
463
{
@@ -429,7 +474,7 @@ int main()
429
474
{
430
475
auto dset = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_KernelNormalization->getDescriptorSetLayout (0u )));
431
476
432
- video::IGPUDescriptorSet::SDescriptorInfo pInfos[2 ];
477
+ video::IGPUDescriptorSet::SDescriptorInfo pInfos[1 +channelCountOverride ];
433
478
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[2 ];
434
479
435
480
for (auto i = 0 ; i < 2 ; i++)
@@ -450,11 +495,15 @@ int main()
450
495
451
496
// Out Buffer
452
497
pWrites[1 ].binding = 1 ;
453
- pWrites[1 ].descriptorType = asset::EDT_STORAGE_BUFFER;
454
- pWrites[1 ].count = 1 ;
455
- pInfos[1 ].desc = fftOutputBuffer_KernelNormalized;
456
- pInfos[1 ].buffer .size = fftOutputBuffer_KernelNormalized->getSize ();
457
- pInfos[1 ].buffer .offset = 0u ;
498
+ pWrites[1 ].descriptorType = asset::EDT_STORAGE_IMAGE;
499
+ pWrites[1 ].count = channelCountOverride;
500
+ for (uint32_t i=0u ; i<channelCountOverride; i++)
501
+ {
502
+ auto & info = pInfos[1u +i];
503
+ info.desc = kernelNormalizedSpectrums[i];
504
+ // info.image.imageLayout = ;
505
+ info.image .sampler = nullptr ;
506
+ }
458
507
459
508
driver->updateDescriptorSets (2u , pWrites, 0u , nullptr );
460
509
return dset;
@@ -472,18 +521,13 @@ int main()
472
521
FFTClass::pushConstants (driver, fftPipelineLayout_SSBOInput.get (), paddedDim, paddedDim, FFTClass::Direction::Y, false , srcNumChannels);
473
522
FFTClass::dispatchHelper (driver, fftDispatchInfo_Vertical);
474
523
475
- // Ker Image FFT Y
476
- driver->bindComputePipeline (fftPipeline_SSBOInput.get ());
477
- driver->bindDescriptorSets (EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get (), 0u , 1u , &fftDescriptorSet_Ker_FFT_Y.get (), nullptr );
478
- FFTClass::pushConstants (driver, fftPipelineLayout_SSBOInput.get (), paddedDim, paddedDim, FFTClass::Direction::Y, false , srcNumChannels);
479
- FFTClass::dispatchHelper (driver, fftDispatchInfo_Vertical);
480
-
481
524
// Ker Normalization
482
525
driver->bindComputePipeline (fftPipeline_KernelNormalization.get ());
483
526
driver->bindDescriptorSets (EPBP_COMPUTE, fftPipelineLayout_KernelNormalization.get (), 0u , 1u , &fftDescriptorSet_KernelNormalization.get (), nullptr );
484
527
{
485
- const uint32_t dispatchSizeX = (paddedDim.width *paddedDim.height *paddedDim.depth *srcNumChannels-1u )/FFTClass::DEFAULT_WORK_GROUP_SIZE+1u ;
486
- driver->dispatch (dispatchSizeX,1 ,1 );
528
+ const uint32_t dispatchSizeX = (paddedDim.width -1u )/16u +1u ;
529
+ const uint32_t dispatchSizeY = (paddedDim.height -1u )/16u +1u ;
530
+ driver->dispatch (dispatchSizeX,dispatchSizeY,kerNumChannels);
487
531
FFTClass::defaultBarrier ();
488
532
}
489
533
}
@@ -494,7 +538,7 @@ int main()
494
538
495
539
// Convolution
496
540
auto convolveDescriptorSet = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(convolvePipelineLayout->getDescriptorSetLayout (0u )));
497
- updateDescriptorSet_Convolution (driver, convolveDescriptorSet.get (), fftOutputBuffer_0, fftOutputBuffer_KernelNormalized );
541
+ updateDescriptorSet_Convolution (driver, convolveDescriptorSet.get (), fftOutputBuffer_0, kernelNormalizedSpectrums );
498
542
499
543
// Last IFFTX
500
544
auto lastFFTDescriptorSet = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(lastFFTPipelineLayout->getDescriptorSetLayout (0u )));
0 commit comments