Skip to content

Commit 35f0046

Browse files
give up on dynamic scaling, also fix the lack of color in the blur kernel, and fix up a few ugly statics in the FFT.cpp
1 parent df814d2 commit 35f0046

File tree

6 files changed

+65
-94
lines changed

6 files changed

+65
-94
lines changed

examples_tests/49.ComputeFFT/fft_convolve_ifft.comp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,9 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint cha
6565
return nbl_glsl_ext_FFT_getData(clamped_coord, channel);
6666
}
6767

68-
6968
void convolve(in uint item_per_thread_count, in uint ch)
7069
{
71-
// TODO: decouple kernel size from image size
70+
// TODO: decouple kernel size from image size (can't get the math to work in my head)
7271
uvec3 dimension = nbl_glsl_ext_FFT_Parameters_t_getDimensions();
7372

7473
for(uint t=0u; t<item_per_thread_count; t++)
@@ -79,12 +78,12 @@ void convolve(in uint item_per_thread_count, in uint ch)
7978
const uvec3 log2_size = uvec3(11u, 10u, 0u);
8079
coords = bitfieldReverse(coords)>>(uvec3(32u)-log2_size);
8180

81+
nbl_glsl_complex sourceSpectrum = nbl_glsl_ext_FFT_impl_values[t];
82+
8283
vec2 uv = (vec2(coords.xy))/vec2(uvec2(1u)<<log2_size.xy)+vec2(0.5f)/vec2(textureSize(NormalizedKernel[ch],0));
83-
const float scaling = 0.2;
84-
uv = mix(uv,uv-1.0,greaterThan(uv,vec2(0.5)))*scaling;
8584
//
8685
nbl_glsl_complex convSpectrum = textureLod(NormalizedKernel[ch],uv,0).xy;
87-
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(nbl_glsl_ext_FFT_impl_values[t],convSpectrum);
86+
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(sourceSpectrum,convSpectrum);
8887
}
8988
}
9089

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -347,13 +347,12 @@ int main()
347347
dstImgViewInfo.image = outImg;
348348
outImgView = driver->createGPUImageView(IGPUImageView::SCreationParams(dstImgViewInfo));
349349
}
350-
// TODO: re-examine
351-
const VkExtent3D paddedDim = FFTClass::padDimensionToNextPOT(srcDim);
352-
auto fftGPUSpecializedShader_ImageInput = FFTClass::createShader(driver, FFTClass::DataType::TEXTURE2D, paddedDim.width);
350+
auto fftGPUSpecializedShader_ImageInput = FFTClass::createShader(driver, FFTClass::DataType::TEXTURE2D, srcDim.width);
353351

354352
auto fftPipelineLayout_ImageInput = FFTClass::getDefaultPipelineLayout(driver, FFTClass::DataType::TEXTURE2D);
355353
auto fftPipeline_ImageInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_ImageInput), std::move(fftGPUSpecializedShader_ImageInput));
356354

355+
const VkExtent3D paddedDim = FFTClass::padDimensionToNextPOT(srcDim);
357356
auto convolveShader = createShader_Convolution(driver, am, paddedDim.height);
358357
auto convolvePipelineLayout = getPipelineLayout_Convolution(driver);
359358
auto convolvePipeline = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(convolvePipelineLayout), std::move(convolveShader));
@@ -481,7 +480,7 @@ int main()
481480
FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
482481

483482
// Ker Image FFT Y
484-
auto fftPipeline_SSBOInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_SSBOInput), FFTClass::createShader(driver,FFTClass::DataType::SSBO,paddedKerDim.height));
483+
auto fftPipeline_SSBOInput = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(fftPipelineLayout_SSBOInput), FFTClass::createShader(driver,FFTClass::DataType::SSBO,kerDim.height));
485484
driver->bindComputePipeline(fftPipeline_SSBOInput.get());
486485
driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT_Y.get(), nullptr);
487486
FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedKerDim, paddedKerDim, FFTClass::Direction::Y, false, srcNumChannels);

examples_tests/49.ComputeFFT/normalization.comp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ void main()
1414
{
1515
// TODO: push constants
1616
const uvec2 log2_sizes = findMSB(gl_WorkGroupSize*gl_NumWorkGroups).xy;
17-
const uvec2 strides = uvec2(1u,0x1u<<log2_sizes.x);
17+
const uvec3 strides = uvec3(1u,0x1u<<log2_sizes.x,0x1u<<(log2_sizes.x+log2_sizes.y));
1818

1919
const float power = length(in_data[0]);
20-
nbl_glsl_complex value = in_data[gl_GlobalInvocationID.x*strides.x+gl_GlobalInvocationID.y*strides.y]/power;
20+
nbl_glsl_complex value = in_data[gl_GlobalInvocationID.x*strides.x+gl_GlobalInvocationID.y*strides.y+gl_GlobalInvocationID.z*strides.z]/power;
2121

2222

2323
uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>(uvec2(32u)-log2_sizes);
Binary file not shown.

include/nbl/ext/FFT/FFT.h

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,6 @@ class FFT : public core::TotalInterface
5555
uint32_t workGroupCount[3];
5656
};
5757

58-
struct alignas(16) Uniforms_t
59-
{
60-
uint32_t dims[3];
61-
};
62-
6358
_NBL_STATIC_INLINE_CONSTEXPR uint32_t DEFAULT_WORK_GROUP_SIZE = 256u;
6459

6560
// returns dispatch size and fills the uniform data
@@ -116,18 +111,10 @@ class FFT : public core::TotalInterface
116111
static core::SRange<const asset::SPushConstantRange> getDefaultPushConstantRanges();
117112

118113
//
119-
static core::SRange<const video::IGPUDescriptorSetLayout::SBinding> getDefaultBindings(video::IVideoDriver* driver, DataType inputType);
114+
static core::smart_refctd_ptr<video::IGPUDescriptorSetLayout> getDefaultDescriptorSetLayout(video::IVideoDriver* driver, DataType inputType);
120115

121116
//
122-
static inline core::smart_refctd_ptr<video::IGPUPipelineLayout> getDefaultPipelineLayout(video::IVideoDriver* driver, DataType inputType)
123-
{
124-
auto pcRange = getDefaultPushConstantRanges();
125-
auto bindings = getDefaultBindings(driver, inputType);
126-
return driver->createGPUPipelineLayout(
127-
pcRange.begin(),pcRange.end(),
128-
driver->createGPUDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr
129-
);
130-
}
117+
static core::smart_refctd_ptr<video::IGPUPipelineLayout> getDefaultPipelineLayout(video::IVideoDriver* driver, DataType inputType);
131118

132119
//
133120
static inline size_t getOutputBufferSize(asset::VkExtent3D const & paddedInputDimensions, uint32_t numChannels)
@@ -182,29 +169,7 @@ class FFT : public core::TotalInterface
182169
core::smart_refctd_ptr<video::IGPUBuffer> outputBufferDescriptor,
183170
asset::ISampler::E_TEXTURE_CLAMP textureWrap)
184171
{
185-
using nbl::asset::ISampler;
186-
187-
static core::smart_refctd_ptr<video::IGPUSampler> samplers[ISampler::E_TEXTURE_CLAMP::ETC_COUNT];
188-
auto & sampler = samplers[(uint32_t)textureWrap];
189-
if (!sampler)
190-
{
191-
video::IGPUSampler::SParams params =
192-
{
193-
{
194-
textureWrap,
195-
textureWrap,
196-
textureWrap,
197-
ISampler::ETBC_FLOAT_TRANSPARENT_BLACK,
198-
ISampler::ETF_NEAREST,
199-
ISampler::ETF_NEAREST,
200-
ISampler::ESMM_NEAREST,
201-
0u,
202-
0u,
203-
ISampler::ECO_ALWAYS
204-
}
205-
};
206-
sampler = driver->createGPUSampler(params);
207-
}
172+
auto sampler = getSampler(driver,textureWrap);
208173

209174
video::IGPUDescriptorSet::SDescriptorInfo pInfos[MAX_DESCRIPTOR_COUNT];
210175
video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[MAX_DESCRIPTOR_COUNT];
@@ -282,6 +247,8 @@ class FFT : public core::TotalInterface
282247
private:
283248
FFT() = delete;
284249
//~FFT() = delete;
250+
251+
static core::smart_refctd_ptr<video::IGPUSampler> getSampler(video::IVideoDriver* driver, asset::ISampler::E_TEXTURE_CLAMP textureWrap);
285252
};
286253

287254

src/nbl/ext/FFT/FFT.cpp

Lines changed: 51 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ using namespace nbl::asset;
1212
using namespace nbl::video;
1313
using namespace ext::FFT;
1414

15-
core::SRange<const asset::SPushConstantRange> FFT::getDefaultPushConstantRanges()
15+
core::SRange<const SPushConstantRange> FFT::getDefaultPushConstantRanges()
1616
{
17-
static const asset::SPushConstantRange ranges[1] =
17+
static const SPushConstantRange ranges[1] =
1818
{
1919
{
2020
ISpecializedShader::ESS_COMPUTE,
@@ -25,9 +25,31 @@ core::SRange<const asset::SPushConstantRange> FFT::getDefaultPushConstantRanges(
2525
return {ranges, ranges+1};
2626
}
2727

28-
core::SRange<const video::IGPUDescriptorSetLayout::SBinding> FFT::getDefaultBindings(video::IVideoDriver* driver, DataType inputType)
28+
core::smart_refctd_ptr<IGPUSampler> FFT::getSampler(IVideoDriver* driver,ISampler::E_TEXTURE_CLAMP textureWrap)
2929
{
30-
static core::smart_refctd_ptr<IGPUSampler> sampler;
30+
IGPUSampler::SParams params =
31+
{
32+
{
33+
textureWrap,
34+
textureWrap,
35+
textureWrap,
36+
ISampler::ETBC_FLOAT_TRANSPARENT_BLACK,
37+
ISampler::ETF_NEAREST,
38+
ISampler::ETF_NEAREST,
39+
ISampler::ESMM_NEAREST,
40+
0u,
41+
0u,
42+
ISampler::ECO_ALWAYS
43+
}
44+
};
45+
// TODO: cache using the asset manager's caches
46+
return driver->createGPUSampler(params);
47+
}
48+
49+
core::smart_refctd_ptr<IGPUDescriptorSetLayout> FFT::getDefaultDescriptorSetLayout(IVideoDriver* driver, FFT::DataType inputType)
50+
{
51+
const bool usingTexture = inputType==DataType::TEXTURE2D;
52+
core::smart_refctd_ptr<IGPUSampler> sampler = usingTexture ? getSampler(driver,ISampler::ETC_CLAMP_TO_EDGE):nullptr;
3153

3254
static IGPUDescriptorSetLayout::SBinding bnd[] =
3355
{
@@ -36,7 +58,7 @@ core::SRange<const video::IGPUDescriptorSetLayout::SBinding> FFT::getDefaultBind
3658
EDT_STORAGE_BUFFER,
3759
1u,
3860
ISpecializedShader::ESS_COMPUTE,
39-
&sampler
61+
usingTexture ? &sampler:nullptr
4062
},
4163
{
4264
1u,
@@ -47,38 +69,24 @@ core::SRange<const video::IGPUDescriptorSetLayout::SBinding> FFT::getDefaultBind
4769
},
4870
};
4971

50-
if (DataType::SSBO == inputType) {
51-
bnd[0].type = EDT_STORAGE_BUFFER;
52-
} else if (DataType::TEXTURE2D == inputType) {
72+
if (usingTexture)
5373
bnd[0].type = EDT_COMBINED_IMAGE_SAMPLER;
54-
}
55-
56-
bnd[0].samplers = nullptr;
57-
58-
if (!sampler)
59-
{
60-
IGPUSampler::SParams params =
61-
{
62-
{
63-
ISampler::ETC_CLAMP_TO_EDGE,
64-
ISampler::ETC_CLAMP_TO_EDGE,
65-
ISampler::ETC_CLAMP_TO_EDGE,
66-
ISampler::ETBC_FLOAT_OPAQUE_BLACK,
67-
ISampler::ETF_NEAREST,
68-
ISampler::ETF_NEAREST,
69-
ISampler::ESMM_NEAREST,
70-
0u,
71-
0u,
72-
ISampler::ECO_ALWAYS
73-
}
74-
};
75-
sampler = driver->createGPUSampler(params);
76-
}
77-
78-
return {bnd, bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)};
74+
else
75+
bnd[0].type = EDT_STORAGE_BUFFER;
76+
return driver->createGPUDescriptorSetLayout(bnd,bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding));
77+
}
78+
79+
//
80+
core::smart_refctd_ptr<IGPUPipelineLayout> FFT::getDefaultPipelineLayout(IVideoDriver* driver, FFT::DataType inputType)
81+
{
82+
auto pcRange = getDefaultPushConstantRanges();
83+
return driver->createGPUPipelineLayout(
84+
pcRange.begin(),pcRange.end(),
85+
getDefaultDescriptorSetLayout(driver,inputType),nullptr,nullptr,nullptr
86+
);
7987
}
8088

81-
core::smart_refctd_ptr<video::IGPUSpecializedShader> FFT::createShader(video::IVideoDriver* driver, DataType inputType, uint32_t maxDimensionSize)
89+
core::smart_refctd_ptr<IGPUSpecializedShader> FFT::createShader(IVideoDriver* driver, DataType inputType, uint32_t maxDimensionSize)
8290
{
8391
uint32_t const maxPaddedDimensionSize = core::roundUpToPoT(maxDimensionSize);
8492

@@ -93,27 +101,25 @@ R"===(#version 430 core
93101
94102
)===";
95103

96-
const size_t extraSize = 32 + 32 + 32 + 32;
104+
constexpr size_t extraSize = 10u*2u+1u;
97105

98106
const uint32_t useSSBOforInput = (DataType::SSBO == inputType) ? 1 : 0;
99-
auto shader = core::make_smart_refctd_ptr<ICPUBuffer>(strlen(sourceFmt)+extraSize+1u);
107+
auto source = core::make_smart_refctd_ptr<ICPUBuffer>(strlen(sourceFmt)+extraSize+1u);
100108
snprintf(
101-
reinterpret_cast<char*>(shader->getPointer()),shader->getSize(), sourceFmt,
109+
reinterpret_cast<char*>(source->getPointer()),source->getSize(), sourceFmt,
102110
useSSBOforInput,
103111
DEFAULT_WORK_GROUP_SIZE,
104112
maxPaddedDimensionSize
105113
);
106114

107-
auto cpuSpecializedShader = core::make_smart_refctd_ptr<ICPUSpecializedShader>(
108-
core::make_smart_refctd_ptr<ICPUShader>(std::move(shader),ICPUShader::buffer_contains_glsl),
109-
ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE}
110-
);
115+
auto shader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(std::move(source),asset::ICPUShader::buffer_contains_glsl));
111116

112-
auto gpuShader = driver->createGPUShader(nbl::core::smart_refctd_ptr<const ICPUShader>(cpuSpecializedShader->getUnspecialized()));
113-
114-
auto gpuSpecializedShader = driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo());
117+
auto specializedShader = driver->createGPUSpecializedShader(
118+
shader.get(),
119+
ISpecializedShader::SInfo{nullptr, nullptr, "main", ISpecializedShader::ESS_COMPUTE}
120+
);
115121

116-
return gpuSpecializedShader;
122+
return specializedShader;
117123
}
118124

119125
void FFT::defaultBarrier()

0 commit comments

Comments
 (0)