Skip to content

Commit f433107

Browse files
committed
3 stages merged!
1 parent e3f3fc5 commit f433107

File tree

8 files changed

+98
-166
lines changed

8 files changed

+98
-166
lines changed

examples_tests/49.ComputeFFT/convolve.comp

Lines changed: 0 additions & 39 deletions
This file was deleted.

examples_tests/49.ComputeFFT/fft_convolve_ifft.comp

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,16 @@ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) i
1313

1414
// Input Descriptor
1515

16-
layout(set=0, binding=0) readonly restrict buffer InputBuffer
16+
layout(set=0, binding=0) buffer InputOutputBuffer
1717
{
18-
nbl_glsl_complex inData[];
18+
nbl_glsl_complex inoutData[];
1919
};
2020

2121
layout(set=0, binding=1) restrict readonly buffer KernelBuffer
2222
{
2323
nbl_glsl_complex kerData[];
2424
};
2525

26-
// Output Descriptor
27-
28-
layout(set=0, binding=2) restrict buffer OutputBuffer
29-
{
30-
nbl_glsl_complex outData[];
31-
};
32-
3326
// Get/Set Data Function
3427
layout(push_constant) uniform PushConstants
3528
{
@@ -47,15 +40,15 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getData(in uvec3 coordinate, in uint channel)
4740
nbl_glsl_complex retValue = nbl_glsl_complex(0, 0);
4841
uvec3 dimension = nbl_glsl_ext_FFT_getDimensions();
4942
uint index = channel * (dimension.x * dimension.y * dimension.z) + coordinate.z * (dimension.x * dimension.y) + coordinate.y * (dimension.x) + coordinate.x;
50-
retValue = inData[index];
43+
retValue = inoutData[index];
5144
return retValue;
5245
}
5346

5447
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
5548
{
5649
uvec3 dimension = nbl_glsl_ext_FFT_getPaddedDimensions();
5750
uint index = channel * (dimension.x * dimension.y * dimension.z) + coordinate.z * (dimension.x * dimension.y) + coordinate.y * (dimension.x) + coordinate.x;
58-
outData[index] = complex_value;
51+
inoutData[index] = complex_value;
5952
}
6053

6154
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint channel) {
@@ -76,21 +69,34 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint cha
7669

7770
void convolve()
7871
{
79-
uint idx = 0;
80-
inData[idx] = nbl_glsl_complex_mul(outData[idx], kerData[idx]);
72+
uint channel = nbl_glsl_ext_FFT_getChannel();
73+
uvec3 dimension = nbl_glsl_ext_FFT_getPaddedDimensions();
74+
uint dataLength = nbl_glsl_ext_FFT_getDimLength(nbl_glsl_ext_FFT_getPaddedDimensions());
75+
76+
uint thread_offset = gl_LocalInvocationIndex;
77+
uint num_virtual_threads = (dataLength-1u)/(_NBL_GLSL_WORKGROUP_SIZE_)+1u;
78+
79+
for(uint t = 0u; t < num_virtual_threads; t++)
80+
{
81+
uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_;
82+
uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
83+
uint idx = channel * (dimension.x * dimension.y * dimension.z) + coords.z * (dimension.x * dimension.y) + coords.y * (dimension.x) + coords.x;
84+
nbl_glsl_complex temp = inoutData[idx];
85+
inoutData[idx] = nbl_glsl_complex_mul(temp, kerData[idx]);
86+
}
8187
}
8288

8389
void main()
8490
{
85-
nbl_glsl_ext_FFT(nbl_glsl_ext_FFT_getIsInverse()); // inData->outData
86-
91+
nbl_glsl_ext_FFT(nbl_glsl_ext_FFT_getIsInverse()); // inoutData->inoutData
92+
8793
barrier();
8894
memoryBarrierShared();
8995

90-
convolve(); // outData+kerData->inData
96+
convolve(); // inoutData+kerData->inoutData
9197

9298
barrier();
9399
memoryBarrierShared();
94100

95-
nbl_glsl_ext_FFT(!nbl_glsl_ext_FFT_getIsInverse()); // inData->outData
101+
nbl_glsl_ext_FFT(!nbl_glsl_ext_FFT_getIsInverse()); // inoutData->inoutData
96102
}

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 56 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -49,20 +49,6 @@ struct DispatchInfo_t
4949
};
5050

5151
static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_Convolution(video::IVideoDriver* driver) {
52-
static const asset::SPushConstantRange ranges[2] =
53-
{
54-
{
55-
ISpecializedShader::ESS_COMPUTE,
56-
0u,
57-
sizeof(uint32_t) * 3
58-
},
59-
{
60-
ISpecializedShader::ESS_COMPUTE,
61-
sizeof(uint32_t) * 4,
62-
sizeof(uint32_t)
63-
},
64-
};
65-
6652
static IGPUDescriptorSetLayout::SBinding bnd[] =
6753
{
6854
{
@@ -88,7 +74,8 @@ static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayou
8874
},
8975
};
9076

91-
core::SRange<const asset::SPushConstantRange> pcRange = {ranges, ranges+2};
77+
using FFTClass = ext::FFT::FFT;
78+
core::SRange<const asset::SPushConstantRange> pcRange = FFTClass::getDefaultPushConstantRanges();
9279
core::SRange<const video::IGPUDescriptorSetLayout::SBinding> bindings = {bnd, bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)};;
9380

9481
return driver->createGPUPipelineLayout(
@@ -98,40 +85,68 @@ static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayou
9885
}
9986
static inline core::smart_refctd_ptr<video::IGPUSpecializedShader> createShader_Convolution(
10087
video::IVideoDriver* driver,
101-
IAssetManager* am) {
102-
IAssetLoader::SAssetLoadParams lp;
103-
auto file_path = "../convolve.comp";
104-
auto shaderAsset = am->getAsset(file_path, lp);
105-
auto cpucs = IAsset::castDown<ICPUSpecializedShader>(shaderAsset.getContents().begin()[0]);
106-
auto cs = driver->createGPUShader(nbl::core::smart_refctd_ptr<const ICPUShader>((cpucs->getUnspecialized())));
107-
asset::ISpecializedShader::SInfo csinfo(nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE, file_path);
108-
auto cs_spec = driver->createGPUSpecializedShader(cs.get(), csinfo);
109-
return cs_spec;
88+
IAssetManager* am,
89+
uint32_t maxDimensionSize) {
90+
uint32_t const maxPaddedDimensionSize = core::roundUpToPoT(maxDimensionSize);
91+
92+
const char* sourceFmt =
93+
R"===(#version 430 core
94+
95+
#define _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_ %u
96+
#define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ %u
97+
#define _NBL_GLSL_EXT_FFT_MAX_ITEMS_PER_THREAD %u
98+
99+
#include "../fft_convolve_ifft.comp"
100+
101+
)===";
102+
103+
const size_t extraSize = 32 + 32 + 32 + 32;
104+
105+
constexpr uint32_t DEFAULT_WORK_GROUP_SIZE = 256u;
106+
const uint32_t maxItemsPerThread = (maxPaddedDimensionSize - 1u) / (DEFAULT_WORK_GROUP_SIZE) + 1u;
107+
auto shader = core::make_smart_refctd_ptr<ICPUBuffer>(strlen(sourceFmt)+extraSize+1u);
108+
snprintf(
109+
reinterpret_cast<char*>(shader->getPointer()),shader->getSize(), sourceFmt,
110+
DEFAULT_WORK_GROUP_SIZE,
111+
maxPaddedDimensionSize,
112+
maxItemsPerThread
113+
);
114+
115+
auto cpuSpecializedShader = core::make_smart_refctd_ptr<ICPUSpecializedShader>(
116+
core::make_smart_refctd_ptr<ICPUShader>(std::move(shader),ICPUShader::buffer_contains_glsl),
117+
ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE}
118+
);
119+
120+
auto gpuShader = driver->createGPUShader(nbl::core::smart_refctd_ptr<const ICPUShader>(cpuSpecializedShader->getUnspecialized()));
121+
122+
auto gpuSpecializedShader = driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo());
123+
124+
return gpuSpecializedShader;
110125
}
111126
static inline void updateDescriptorSet_Convolution (
112127
video::IVideoDriver * driver,
113128
video::IGPUDescriptorSet * set,
114-
core::smart_refctd_ptr<video::IGPUBuffer> sourceBufferDescriptor,
115-
core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor,
116-
core::smart_refctd_ptr<video::IGPUBuffer> outputBufferDescriptor)
129+
core::smart_refctd_ptr<video::IGPUBuffer> inputOutputBufferDescriptor,
130+
core::smart_refctd_ptr<video::IGPUBuffer> kernelBufferDescriptor)
117131
{
118-
video::IGPUDescriptorSet::SDescriptorInfo pInfos[3];
119-
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[3];
132+
constexpr uint32_t descCount = 2u;
133+
video::IGPUDescriptorSet::SDescriptorInfo pInfos[descCount];
134+
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[descCount];
120135

121-
for (auto i = 0; i < 3; i++)
136+
for (auto i = 0; i < descCount; i++)
122137
{
123138
pWrites[i].dstSet = set;
124139
pWrites[i].arrayElement = 0u;
125140
pWrites[i].count = 1u;
126141
pWrites[i].info = pInfos+i;
127142
}
128143

129-
// Source Buffer
144+
// InputOutput Buffer
130145
pWrites[0].binding = 0;
131146
pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER;
132147
pWrites[0].count = 1;
133-
pInfos[0].desc = sourceBufferDescriptor;
134-
pInfos[0].buffer.size = sourceBufferDescriptor->getSize();
148+
pInfos[0].desc = inputOutputBufferDescriptor;
149+
pInfos[0].buffer.size = inputOutputBufferDescriptor->getSize();
135150
pInfos[0].buffer.offset = 0u;
136151

137152
// Kernel Buffer
@@ -141,42 +156,10 @@ static inline void updateDescriptorSet_Convolution (
141156
pInfos[1].desc = kernelBufferDescriptor;
142157
pInfos[1].buffer.size = kernelBufferDescriptor->getSize();
143158
pInfos[1].buffer.offset = 0u;
144-
145-
// Output Buffer
146-
pWrites[2].binding = 2;
147-
pWrites[2].descriptorType = asset::EDT_STORAGE_BUFFER;
148-
pWrites[2].count = 1;
149-
pInfos[2].desc = outputBufferDescriptor;
150-
pInfos[2].buffer.size = outputBufferDescriptor->getSize();
151-
pInfos[2].buffer.offset = 0u;
152-
153-
driver->updateDescriptorSets(3u, pWrites, 0u, nullptr);
154-
}
155-
static inline void dispatchHelper_Convolution(
156-
video::IVideoDriver* driver,
157-
const DispatchInfo_t& dispatchInfo)
158-
{
159-
driver->dispatch(dispatchInfo.workGroupCount[0], dispatchInfo.workGroupCount[1], dispatchInfo.workGroupCount[2]);
160-
COpenGLExtensionHandler::pGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
161-
}
162-
static inline DispatchInfo_t getDispatchInfo_Convolution(
163-
asset::VkExtent3D const & paddedDimension,
164-
uint32_t numChannels)
165-
{
166-
DispatchInfo_t ret = {};
167-
168-
ret.workGroupDims[0] = 256;
169-
ret.workGroupDims[1] = 1;
170-
ret.workGroupDims[2] = 1;
171-
172-
ret.workGroupCount[0] = core::ceil(float(paddedDimension.width * paddedDimension.height * paddedDimension.depth * numChannels) / ret.workGroupDims[0]);
173-
ret.workGroupCount[1] = 1;
174-
ret.workGroupCount[2] = 1;
175159

176-
return ret;
160+
driver->updateDescriptorSets(descCount, pWrites, 0u, nullptr);
177161
}
178162

179-
180163
static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getPipelineLayout_RemovePadding(video::IVideoDriver* driver) {
181164
static const asset::SPushConstantRange ranges[3] =
182165
{
@@ -410,11 +393,9 @@ int main()
410393
auto fftDispatchInfo_Horizontal = FFTClass::buildParameters(paddedDim, FFTClass::Direction::X, srcNumChannels);
411394
auto fftDispatchInfo_Vertical = FFTClass::buildParameters(paddedDim, FFTClass::Direction::Y, srcNumChannels);
412395

413-
auto convolveShader = createShader_Convolution(driver, am);
396+
auto convolveShader = createShader_Convolution(driver, am, maxPaddedDimensionSize);
414397
auto convolvePipelineLayout = getPipelineLayout_Convolution(driver);
415398
auto convolvePipeline = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(convolvePipelineLayout), std::move(convolveShader));
416-
auto convolveDispatchInfo = getDispatchInfo_Convolution(paddedDim, srcNumChannels);
417-
418399

419400
// Allocate Output Buffer
420401
auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(paddedDim, srcNumChannels)); // result of: srcFFTX and kerFFTX and Convolution and IFFTY
@@ -464,27 +445,19 @@ int main()
464445
auto fftDescriptorSet_Src_FFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_ImageInput->getDescriptorSetLayout(0u)));
465446
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Src_FFT_X.get(), srcImageView, fftOutputBuffer_0);
466447

467-
// Src FFT Y
468-
auto fftDescriptorSet_Src_FFT_Y = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
469-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Src_FFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
470-
471448
// Convolution
472449
auto convolveDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(convolvePipelineLayout->getDescriptorSetLayout(0u)));
473-
updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_1, fftOutputBuffer_KernelNormalized, fftOutputBuffer_0);
474-
475-
// IFFT Y
476-
auto fftDescriptorSet_IFFT_Y = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
477-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
478-
450+
updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_0, fftOutputBuffer_KernelNormalized);
451+
479452
// IFFT X
480453
auto fftDescriptorSet_IFFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
481-
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_X.get(), fftOutputBuffer_1, fftOutputBuffer_0);
454+
FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_X.get(), fftOutputBuffer_0, fftOutputBuffer_1);
482455

483456
auto removePaddingShader = createShader_RemovePadding(driver, am);
484457
auto removePaddingPipelineLayout = getPipelineLayout_RemovePadding(driver);
485458
auto removePaddingPipeline = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(removePaddingPipelineLayout), std::move(removePaddingShader));
486459
auto removePaddingDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(removePaddingPipelineLayout->getDescriptorSetLayout(0u)));
487-
updateDescriptorSet_RemovePadding(driver, removePaddingDescriptorSet.get(), fftOutputBuffer_0, outImgView);
460+
updateDescriptorSet_RemovePadding(driver, removePaddingDescriptorSet.get(), fftOutputBuffer_1, outImgView);
488461
auto removePaddingDispatchInfo = getDispatchInfo_RemovePadding(outImageDim);
489462

490463
uint32_t outBufferIx = 0u;
@@ -506,23 +479,10 @@ int main()
506479
FFTClass::pushConstants(driver, fftPipelineLayout_ImageInput.get(), srcDim, paddedDim, FFTClass::Direction::X, false, FFTClass::PaddingType::CLAMP_TO_EDGE);
507480
FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
508481

509-
// Src Image FFT Y
510-
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
511-
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_Src_FFT_Y.get(), nullptr);
512-
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, false);
513-
FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
514-
515-
// Convolution
482+
// Src Image FFT Y + Convolution + Convolved IFFT Y
516483
driver->bindComputePipeline(convolvePipeline.get());
517484
driver->bindDescriptorSets(EPBP_COMPUTE, convolvePipelineLayout.get(), 0u, 1u, &convolveDescriptorSet.get(), nullptr);
518-
driver->pushConstants(convolvePipelineLayout.get(), nbl::video::IGPUSpecializedShader::ESS_COMPUTE, 0u, sizeof(uint32_t) * 3, &paddedDim); // pc.numChannels
519-
driver->pushConstants(convolvePipelineLayout.get(), nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 4, sizeof(uint32_t), &srcNumChannels); // numSrcChannels
520-
dispatchHelper_Convolution(driver, convolveDispatchInfo);
521-
522-
// Convolved IFFT Y
523-
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
524-
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_IFFT_Y.get(), nullptr);
525-
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, true);
485+
FFTClass::pushConstants(driver, convolvePipelineLayout.get(), paddedDim, paddedDim, FFTClass::Direction::Y, false);
526486
FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
527487

528488
// Convolved IFFT X

include/nbl/builtin/glsl/ext/FFT/default_compute_fft.comp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77
#error "USE_SSBO_FOR_INPUT should be defined."
88
#endif
99

10-
#define _NBL_GLSL_WORKGROUP_SIZE_ 256
11-
12-
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
10+
#ifndef _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_
11+
#define _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_ 256
12+
#endif
13+
#define _NBL_GLSL_WORKGROUP_SIZE_ _NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_
1314

15+
layout(local_size_x=_NBL_GLSL_EXT_FFT_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
1416

1517
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
1618
#define _NBL_GLSL_EXT_FFT_GET_DATA_DEFINED_

0 commit comments

Comments
 (0)