Skip to content

Commit c39759b

Browse files
coalesces writes > coalesced loads
1 parent e22d91d commit c39759b

File tree

3 files changed

+31
-37
lines changed

3 files changed

+31
-37
lines changed

examples_tests/49.ComputeFFT/fft_convolve_ifft.comp

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,7 @@ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) i
22

33
#include <nbl/builtin/glsl/ext/FFT/types.glsl>
44

5-
// Input and Output Descriptor
6-
layout(set=0, binding=0) buffer restrict InputOutputBuffer
7-
{
8-
nbl_glsl_ext_FFT_storage_t inoutData[];
9-
};
10-
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
11-
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
12-
13-
layout(set=0, binding=1) uniform sampler2D NormalizedKernel[3];
5+
layout(set=0, binding=2) uniform sampler2D NormalizedKernel[3];
146

157
#include "convolve_parameters.glsl"
168
layout(push_constant) uniform PushConstants
@@ -25,9 +17,6 @@ nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
2517
}
2618
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
2719

28-
// do I need to unrestrict these buffers?
29-
#define inData inoutData
30-
#define outData inoutData
3120
#define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_
3221
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
3322

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,35 +71,42 @@ R"===(#version 430 core
7171
inline void updateDescriptorSet_Convolution (
7272
video::IVideoDriver * driver,
7373
video::IGPUDescriptorSet * set,
74-
core::smart_refctd_ptr<video::IGPUBuffer> inputOutputBufferDescriptor,
74+
core::smart_refctd_ptr<video::IGPUBuffer> inputBufferDescriptor,
75+
core::smart_refctd_ptr<video::IGPUBuffer> outputBufferDescriptor,
7576
const core::smart_refctd_ptr<video::IGPUImageView>* kernelNormalizedSpectrumImageDescriptors)
7677
{
77-
constexpr uint32_t descCount = 2u;
78-
video::IGPUDescriptorSet::SDescriptorInfo pInfos[1u+channelCountOverride];
78+
constexpr uint32_t descCount = 3u;
79+
video::IGPUDescriptorSet::SDescriptorInfo pInfos[descCount-1u+channelCountOverride];
7980
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[descCount];
8081

8182
for (auto i = 0; i < descCount; i++)
8283
{
84+
pWrites[i].binding = i;
8385
pWrites[i].dstSet = set;
8486
pWrites[i].arrayElement = 0u;
8587
pWrites[i].info = pInfos+i;
8688
}
8789

88-
// InputOutput Buffer
89-
pWrites[0].binding = 0;
90+
// Input Buffer
9091
pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER;
9192
pWrites[0].count = 1;
92-
pInfos[0].desc = inputOutputBufferDescriptor;
93-
pInfos[0].buffer.size = inputOutputBufferDescriptor->getSize();
93+
pInfos[0].desc = inputBufferDescriptor;
94+
pInfos[0].buffer.size = inputBufferDescriptor->getSize();
9495
pInfos[0].buffer.offset = 0u;
96+
97+
//
98+
pWrites[1].descriptorType = asset::EDT_STORAGE_BUFFER;
99+
pWrites[1].count = 1;
100+
pInfos[1].desc = outputBufferDescriptor;
101+
pInfos[1].buffer.size = outputBufferDescriptor->getSize();
102+
pInfos[1].buffer.offset = 0u;
95103

96104
// Kernel Buffer
97-
pWrites[1].binding = 1;
98-
pWrites[1].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER;
99-
pWrites[1].count = channelCountOverride;
105+
pWrites[2].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER;
106+
pWrites[2].count = channelCountOverride;
100107
for (uint32_t i=0u; i<channelCountOverride; i++)
101108
{
102-
auto& info = pInfos[1u+i];
109+
auto& info = pInfos[2u+i];
103110
info.desc = kernelNormalizedSpectrumImageDescriptors[i];
104111
//info.image.imageLayout = ;
105112
info.image.sampler = nullptr;
@@ -330,6 +337,13 @@ int main()
330337
},
331338
{
332339
1u,
340+
EDT_STORAGE_BUFFER,
341+
1u,
342+
ISpecializedShader::ESS_COMPUTE,
343+
nullptr
344+
},
345+
{
346+
2u,
333347
EDT_COMBINED_IMAGE_SAMPLER,
334348
channelCountOverride,
335349
ISpecializedShader::ESS_COMPUTE,
@@ -579,11 +593,11 @@ int main()
579593

580594
// Convolution
581595
auto convolveDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(convolvePipeline->getLayout()->getDescriptorSetLayout(0u)));
582-
updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_0, kernelNormalizedSpectrums);
596+
updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_0, fftOutputBuffer_1, kernelNormalizedSpectrums);
583597

584598
// Last IFFTX
585599
auto lastFFTDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(lastFFTPipeline->getLayout()->getDescriptorSetLayout(0u)));
586-
updateDescriptorSet_LastFFT(driver, lastFFTDescriptorSet.get(), fftOutputBuffer_0, outImgView);
600+
updateDescriptorSet_LastFFT(driver, lastFFTDescriptorSet.get(), fftOutputBuffer_1, outImgView);
587601

588602
uint32_t outBufferIx = 0u;
589603
auto lastPresentStamp = std::chrono::high_resolution_clock::now();
@@ -601,7 +615,9 @@ int main()
601615
const auto passes = FFTClass::buildParameters(false,srcNumChannels,srcDim,fftPushConstants,fftDispatchInfo,fftPadding,paddedSrcDim);
602616
{
603617
fftPushConstants[1].input_dimensions.x = 2048u;
604-
fftPushConstants[1].output_strides = fftPushConstants[1].input_strides;
618+
fftPushConstants[1].input_strides = fftPushConstants[0].output_strides;
619+
fftPushConstants[1].output_strides.x = 2048u;
620+
fftPushConstants[1].output_strides.y = 1u;
605621
fftPushConstants[2] = fftPushConstants[0];
606622
fftPushConstants[2].input_dimensions.x = 2048u;
607623
fftPushConstants[2].input_dimensions.y = 2048u;

include/nbl/builtin/glsl/workgroup/fft.glsl

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,16 +105,5 @@ void nbl_glsl_workgroupFFT(in bool is_inverse, inout nbl_glsl_complex lo, inout
105105
}
106106
}
107107

108-
#if 0 // TODO
109-
// Computes Forward FFT of two real signals
110-
void nbl_glsl_workgroupRealFFT(in bool is_inverse, in float sequenceALo, in float sequenceAHi, in float sequenceBLo, in float sequenceBHi)
111-
{
112-
nbl_glsl_complex lo = nbl_glsl_complex(sequenceALo,sequenceBLo);
113-
nbl_glsl_complex hi = nbl_glsl_complex(sequenceAHi,sequenceBHi);
114-
nbl_glsl_workgroupFFT(false,lo,hi);
115-
// extract aDFT and bDFT by using sorensens method
116-
}
117-
#endif
118-
119108

120109
#endif

0 commit comments

Comments
 (0)