@@ -49,20 +49,6 @@ struct DispatchInfo_t
49
49
};
50
50
51
51
static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_Convolution (video::IVideoDriver* driver) {
52
- static const asset::SPushConstantRange ranges[2 ] =
53
- {
54
- {
55
- ISpecializedShader::ESS_COMPUTE,
56
- 0u ,
57
- sizeof (uint32_t ) * 3
58
- },
59
- {
60
- ISpecializedShader::ESS_COMPUTE,
61
- sizeof (uint32_t ) * 4 ,
62
- sizeof (uint32_t )
63
- },
64
- };
65
-
66
52
static IGPUDescriptorSetLayout::SBinding bnd[] =
67
53
{
68
54
{
@@ -88,7 +74,8 @@ static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayou
88
74
},
89
75
};
90
76
91
- core::SRange<const asset::SPushConstantRange> pcRange = {ranges, ranges+2 };
77
+ using FFTClass = ext::FFT::FFT;
78
+ core::SRange<const asset::SPushConstantRange> pcRange = FFTClass::getDefaultPushConstantRanges ();
92
79
core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof (bnd)/sizeof (IGPUDescriptorSetLayout::SBinding)};;
93
80
94
81
return driver->createGPUPipelineLayout (
@@ -98,40 +85,68 @@ static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayou
98
85
}
99
86
static inline core::smart_refctd_ptr<video::IGPUSpecializedShader> createShader_Convolution (
100
87
video::IVideoDriver* driver,
101
- IAssetManager* am) {
102
- IAssetLoader::SAssetLoadParams lp;
103
- auto file_path = " ../convolve.comp" ;
104
- auto shaderAsset = am->getAsset (file_path, lp);
105
- auto cpucs = IAsset::castDown<ICPUSpecializedShader>(shaderAsset.getContents ().begin ()[0 ]);
106
- auto cs = driver->createGPUShader (nbl::core::smart_refctd_ptr<const ICPUShader>((cpucs->getUnspecialized ())));
107
- asset::ISpecializedShader::SInfo csinfo (nullptr , nullptr , " main" , asset::ISpecializedShader::ESS_COMPUTE, file_path);
108
- auto cs_spec = driver->createGPUSpecializedShader (cs.get (), csinfo);
109
- return cs_spec;
88
+ IAssetManager* am,
89
+ uint32_t maxDimensionSize) {
90
+ uint32_t const maxPaddedDimensionSize = core::roundUpToPoT (maxDimensionSize);
91
+
92
+ const char * sourceFmt =
93
+ R"===( #version 430 core
94
+
95
+ #define _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_ %u
96
+ #define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ %u
97
+ #define _NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD %u
98
+
99
+ #include "../fft_convolve_ifft.comp"
100
+
101
+ )===" ;
102
+
103
+ const size_t extraSize = 32 + 32 + 32 + 32 ;
104
+
105
+ constexpr uint32_t DEFAULT_WORK_GROUP_SIZE = 256u ;
106
+ const uint32_t maxItemsPerThread = (maxPaddedDimensionSize - 1u ) / (DEFAULT_WORK_GROUP_SIZE) + 1u ;
107
+ auto shader = core::make_smart_refctd_ptr<ICPUBuffer>(strlen (sourceFmt)+extraSize+1u );
108
+ snprintf (
109
+ reinterpret_cast <char *>(shader->getPointer ()),shader->getSize (), sourceFmt,
110
+ DEFAULT_WORK_GROUP_SIZE,
111
+ maxPaddedDimensionSize,
112
+ maxItemsPerThread
113
+ );
114
+
115
+ auto cpuSpecializedShader = core::make_smart_refctd_ptr<ICPUSpecializedShader>(
116
+ core::make_smart_refctd_ptr<ICPUShader>(std::move (shader),ICPUShader::buffer_contains_glsl),
117
+ ISpecializedShader::SInfo{nullptr , nullptr , " main" , asset::ISpecializedShader::ESS_COMPUTE}
118
+ );
119
+
120
+ auto gpuShader = driver->createGPUShader (nbl::core::smart_refctd_ptr<const ICPUShader>(cpuSpecializedShader->getUnspecialized ()));
121
+
122
+ auto gpuSpecializedShader = driver->createGPUSpecializedShader (gpuShader.get (), cpuSpecializedShader->getSpecializationInfo ());
123
+
124
+ return gpuSpecializedShader;
110
125
}
111
126
static inline void updateDescriptorSet_Convolution (
112
127
video::IVideoDriver * driver,
113
128
video::IGPUDescriptorSet * set,
114
- core::smart_refctd_ptr<video::IGPUBuffer> sourceBufferDescriptor,
115
- core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor,
116
- core::smart_refctd_ptr<video::IGPUBuffer> outputBufferDescriptor)
129
+ core::smart_refctd_ptr<video::IGPUBuffer> inputOutputBufferDescriptor,
130
+ core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor)
117
131
{
118
- video::IGPUDescriptorSet::SDescriptorInfo pInfos[3 ];
119
- video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[3 ];
132
+ constexpr uint32_t descCount = 2u ;
133
+ video::IGPUDescriptorSet::SDescriptorInfo pInfos[descCount];
134
+ video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[descCount];
120
135
121
- for (auto i = 0 ; i < 3 ; i++)
136
+ for (auto i = 0 ; i < descCount ; i++)
122
137
{
123
138
pWrites[i].dstSet = set;
124
139
pWrites[i].arrayElement = 0u ;
125
140
pWrites[i].count = 1u ;
126
141
pWrites[i].info = pInfos+i;
127
142
}
128
143
129
- // Source Buffer
144
+ // InputOutput Buffer
130
145
pWrites[0 ].binding = 0 ;
131
146
pWrites[0 ].descriptorType = asset::EDT_STORAGE_BUFFER;
132
147
pWrites[0 ].count = 1 ;
133
- pInfos[0 ].desc = sourceBufferDescriptor ;
134
- pInfos[0 ].buffer .size = sourceBufferDescriptor ->getSize ();
148
+ pInfos[0 ].desc = inputOutputBufferDescriptor ;
149
+ pInfos[0 ].buffer .size = inputOutputBufferDescriptor ->getSize ();
135
150
pInfos[0 ].buffer .offset = 0u ;
136
151
137
152
// Kernel Buffer
@@ -141,42 +156,10 @@ static inline void updateDescriptorSet_Convolution (
141
156
pInfos[1 ].desc = kernelBufferDescriptor;
142
157
pInfos[1 ].buffer .size = kernelBufferDescriptor->getSize ();
143
158
pInfos[1 ].buffer .offset = 0u ;
144
-
145
- // Output Buffer
146
- pWrites[2 ].binding = 2 ;
147
- pWrites[2 ].descriptorType = asset::EDT_STORAGE_BUFFER;
148
- pWrites[2 ].count = 1 ;
149
- pInfos[2 ].desc = outputBufferDescriptor;
150
- pInfos[2 ].buffer .size = outputBufferDescriptor->getSize ();
151
- pInfos[2 ].buffer .offset = 0u ;
152
-
153
- driver->updateDescriptorSets (3u , pWrites, 0u , nullptr );
154
- }
155
- static inline void dispatchHelper_Convolution (
156
- video::IVideoDriver* driver,
157
- const DispatchInfo_t& dispatchInfo)
158
- {
159
- driver->dispatch (dispatchInfo.workGroupCount [0 ], dispatchInfo.workGroupCount [1 ], dispatchInfo.workGroupCount [2 ]);
160
- COpenGLExtensionHandler::pGlMemoryBarrier (GL_SHADER_STORAGE_BARRIER_BIT);
161
- }
162
- static inline DispatchInfo_t getDispatchInfo_Convolution (
163
- asset::VkExtent3D const & paddedDimension,
164
- uint32_t numChannels)
165
- {
166
- DispatchInfo_t ret = {};
167
-
168
- ret.workGroupDims [0 ] = 256 ;
169
- ret.workGroupDims [1 ] = 1 ;
170
- ret.workGroupDims [2 ] = 1 ;
171
-
172
- ret.workGroupCount [0 ] = core::ceil (float (paddedDimension.width * paddedDimension.height * paddedDimension.depth * numChannels) / ret.workGroupDims [0 ]);
173
- ret.workGroupCount [1 ] = 1 ;
174
- ret.workGroupCount [2 ] = 1 ;
175
159
176
- return ret ;
160
+ driver-> updateDescriptorSets (descCount, pWrites, 0u , nullptr ) ;
177
161
}
178
162
179
-
180
163
static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_RemovePadding (video::IVideoDriver* driver) {
181
164
static const asset::SPushConstantRange ranges[3 ] =
182
165
{
@@ -410,11 +393,9 @@ int main()
410
393
auto fftDispatchInfo_Horizontal = FFTClass::buildParameters (paddedDim, FFTClass::Direction::X, srcNumChannels);
411
394
auto fftDispatchInfo_Vertical = FFTClass::buildParameters (paddedDim, FFTClass::Direction::Y, srcNumChannels);
412
395
413
- auto convolveShader = createShader_Convolution (driver, am);
396
+ auto convolveShader = createShader_Convolution (driver, am, maxPaddedDimensionSize );
414
397
auto convolvePipelineLayout = getPipelineLayout_Convolution (driver);
415
398
auto convolvePipeline = driver->createGPUComputePipeline (nullptr , core::smart_refctd_ptr (convolvePipelineLayout), std::move (convolveShader));
416
- auto convolveDispatchInfo = getDispatchInfo_Convolution (paddedDim, srcNumChannels);
417
-
418
399
419
400
// Allocate Output Buffer
420
401
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem (FFTClass::getOutputBufferSize (paddedDim, srcNumChannels)); // result of: srcFFTX and kerFFTX and Convolution and IFFTY
@@ -464,27 +445,19 @@ int main()
464
445
auto fftDescriptorSet_Src_FFT_X = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_ImageInput->getDescriptorSetLayout (0u )));
465
446
FFTClass::updateDescriptorSet (driver, fftDescriptorSet_Src_FFT_X.get (), srcImageView, fftOutputBuffer_0);
466
447
467
- // Src FFT Y
468
- auto fftDescriptorSet_Src_FFT_Y = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout (0u )));
469
- FFTClass::updateDescriptorSet (driver, fftDescriptorSet_Src_FFT_Y.get (), fftOutputBuffer_0, fftOutputBuffer_1);
470
-
471
448
// Convolution
472
449
auto convolveDescriptorSet = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(convolvePipelineLayout->getDescriptorSetLayout (0u )));
473
- updateDescriptorSet_Convolution (driver, convolveDescriptorSet.get (), fftOutputBuffer_1, fftOutputBuffer_KernelNormalized, fftOutputBuffer_0);
474
-
475
- // IFFT Y
476
- auto fftDescriptorSet_IFFT_Y = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout (0u )));
477
- FFTClass::updateDescriptorSet (driver, fftDescriptorSet_IFFT_Y.get (), fftOutputBuffer_0, fftOutputBuffer_1);
478
-
450
+ updateDescriptorSet_Convolution (driver, convolveDescriptorSet.get (), fftOutputBuffer_0, fftOutputBuffer_KernelNormalized);
451
+
479
452
// IFFT X
480
453
auto fftDescriptorSet_IFFT_X = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout (0u )));
481
- FFTClass::updateDescriptorSet (driver, fftDescriptorSet_IFFT_X.get (), fftOutputBuffer_1, fftOutputBuffer_0 );
454
+ FFTClass::updateDescriptorSet (driver, fftDescriptorSet_IFFT_X.get (), fftOutputBuffer_0, fftOutputBuffer_1 );
482
455
483
456
auto removePaddingShader = createShader_RemovePadding (driver, am);
484
457
auto removePaddingPipelineLayout = getPipelineLayout_RemovePadding (driver);
485
458
auto removePaddingPipeline = driver->createGPUComputePipeline (nullptr , core::smart_refctd_ptr (removePaddingPipelineLayout), std::move (removePaddingShader));
486
459
auto removePaddingDescriptorSet = driver->createGPUDescriptorSet (core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(removePaddingPipelineLayout->getDescriptorSetLayout (0u )));
487
- updateDescriptorSet_RemovePadding (driver, removePaddingDescriptorSet.get (), fftOutputBuffer_0 , outImgView);
460
+ updateDescriptorSet_RemovePadding (driver, removePaddingDescriptorSet.get (), fftOutputBuffer_1 , outImgView);
488
461
auto removePaddingDispatchInfo = getDispatchInfo_RemovePadding (outImageDim);
489
462
490
463
uint32_t outBufferIx = 0u ;
@@ -506,23 +479,10 @@ int main()
506
479
FFTClass::pushConstants (driver, fftPipelineLayout_ImageInput.get (), srcDim, paddedDim, FFTClass::Direction::X, false , FFTClass::PaddingType::CLAMP_TO_EDGE);
507
480
FFTClass::dispatchHelper (driver, fftDispatchInfo_Horizontal);
508
481
509
- // Src Image FFT Y
510
- driver->bindComputePipeline (fftPipeline_SSBOInput.get ());
511
- driver->bindDescriptorSets (EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get (), 0u , 1u , &fftDescriptorSet_Src_FFT_Y.get (), nullptr );
512
- FFTClass::pushConstants (driver, fftPipelineLayout_SSBOInput.get (), paddedDim, paddedDim, FFTClass::Direction::Y, false );
513
- FFTClass::dispatchHelper (driver, fftDispatchInfo_Vertical);
514
-
515
- // Convolution
482
+ // Src Image FFT Y + Convolution + Convolved IFFT Y
516
483
driver->bindComputePipeline (convolvePipeline.get ());
517
484
driver->bindDescriptorSets (EPBP_COMPUTE, convolvePipelineLayout.get (), 0u , 1u , &convolveDescriptorSet.get (), nullptr );
518
- driver->pushConstants (convolvePipelineLayout.get (), nbl::video::IGPUSpecializedShader::ESS_COMPUTE, 0u , sizeof (uint32_t ) * 3 , &paddedDim); // pc.numChannels
519
- driver->pushConstants (convolvePipelineLayout.get (), nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof (uint32_t ) * 4 , sizeof (uint32_t ), &srcNumChannels); // numSrcChannels
520
- dispatchHelper_Convolution (driver, convolveDispatchInfo);
521
-
522
- // Convolved IFFT Y
523
- driver->bindComputePipeline (fftPipeline_SSBOInput.get ());
524
- driver->bindDescriptorSets (EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get (), 0u , 1u , &fftDescriptorSet_IFFT_Y.get (), nullptr );
525
- FFTClass::pushConstants (driver, fftPipelineLayout_SSBOInput.get (), paddedDim, paddedDim, FFTClass::Direction::Y, true );
485
+ FFTClass::pushConstants (driver, convolvePipelineLayout.get (), paddedDim, paddedDim, FFTClass::Direction::Y, false );
526
486
FFTClass::dispatchHelper (driver, fftDispatchInfo_Vertical);
527
487
528
488
// Convolved IFFT X
0 commit comments