Skip to content

Commit dfae21c

Browse files
Merge pull request #79 from Devsh-Graphics-Programming/fft
Final FFT Improvements
2 parents 42e3b18 + 2368ba2 commit dfae21c

File tree

18 files changed

+779
-703
lines changed

18 files changed

+779
-703
lines changed

examples_tests/39.DenoiserTonemapper/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ set(EXTRA_SOURCES
1919
CommandLineHandler.cpp
2020
../../src/nbl/ext/LumaMeter/CLumaMeter.cpp
2121
../../src/nbl/ext/ToneMapper/CToneMapper.cpp
22+
../../src/nbl/ext/FFT/FFT.cpp
2223
)
2324

2425
nbl_create_executable_project(
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
2+
// This file is part of the "Nabla Engine".
3+
// For conditions of distribution and use, see copyright notice in nabla.h
4+
5+
#include "nbl/builtin/glsl/ext/FFT/parameters_struct.glsl"
6+
struct convolve_parameters_t
7+
{
8+
nbl_glsl_ext_FFT_Parameters_t fft;
9+
vec2 kernel_half_pixel_size;
10+
};
11+
12+
struct image_store_parameters_t
13+
{
14+
nbl_glsl_ext_FFT_Parameters_t fft;
15+
ivec2 unpad_offset;
16+
};
Lines changed: 30 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,37 @@
1-
// WorkGroup Size
2-
#ifndef _NBL_GLSL_WORKGROUP_SIZE_
3-
#define _NBL_GLSL_WORKGROUP_SIZE_ 256
4-
#endif
51
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
62

7-
8-
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
9-
#define _NBL_GLSL_EXT_FFT_GET_DATA_DEFINED_
10-
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
11-
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
12-
#include "nbl/builtin/glsl/ext/FFT/fft.glsl"
13-
14-
// Input Descriptor
3+
#include <nbl/builtin/glsl/ext/FFT/types.glsl>
154

16-
layout(set=0, binding=0) buffer restrict InputOutputBuffer
17-
{
18-
nbl_glsl_complex inoutData[];
19-
};
5+
layout(set=0, binding=2) uniform sampler2D NormalizedKernel[3];
206

21-
layout(set=0, binding=1) uniform sampler2D NormalizedKernel[3];
22-
23-
// Get/Set Data Function
7+
#include "extra_parameters.glsl"
248
layout(push_constant) uniform PushConstants
259
{
26-
nbl_glsl_ext_FFT_Parameters_t params;
10+
convolve_parameters_t params;
2711
} pc;
12+
#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
2813

29-
nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
30-
{
31-
nbl_glsl_ext_FFT_Parameters_t ret;
32-
ret = pc.params;
33-
return ret;
34-
}
35-
36-
nbl_glsl_complex nbl_glsl_ext_FFT_getData(in uvec3 coordinate, in uint channel)
14+
nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
3715
{
38-
nbl_glsl_complex retValue = nbl_glsl_complex(0, 0);
39-
uvec3 dimension = nbl_glsl_ext_FFT_Parameters_t_getDimensions();
40-
uint index = channel * (dimension.x * dimension.y * dimension.z) + coordinate.z * (dimension.x * dimension.y) + coordinate.y * (dimension.x) + coordinate.x;
41-
retValue = inoutData[index];
42-
return retValue;
16+
return pc.params.fft;
4317
}
18+
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
4419

45-
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
46-
{
47-
uvec3 dimension = nbl_glsl_ext_FFT_Parameters_t_getPaddedDimensions();
48-
uint index = channel * (dimension.x * dimension.y * dimension.z) + coordinate.z * (dimension.x * dimension.y) + coordinate.y * (dimension.x) + coordinate.x;
49-
inoutData[index] = complex_value;
50-
}
51-
52-
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint channel) {
53-
54-
uvec3 max_coord = nbl_glsl_ext_FFT_Parameters_t_getDimensions() - uvec3(1u);
55-
uvec3 clamped_coord = min(coordinate, max_coord);
56-
57-
bool is_out_of_range = any(bvec3(coordinate!=clamped_coord));
58-
59-
uint paddingType = nbl_glsl_ext_FFT_Parameters_t_getPaddingType();
60-
61-
if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == paddingType && is_out_of_range) {
62-
return nbl_glsl_complex(0, 0);
63-
}
64-
65-
return nbl_glsl_ext_FFT_getData(clamped_coord, channel);
66-
}
20+
#define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_
21+
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
6722

6823
void convolve(in uint item_per_thread_count, in uint ch)
6924
{
7025
// TODO: decouple kernel size from image size (can't get the math to work in my head)
71-
uvec3 dimension = nbl_glsl_ext_FFT_Parameters_t_getDimensions();
72-
7326
for(uint t=0u; t<item_per_thread_count; t++)
7427
{
75-
uint tid = gl_LocalInvocationIndex + t * _NBL_GLSL_WORKGROUP_SIZE_;
76-
// TODO: refactor
77-
uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
78-
const uvec3 log2_size = uvec3(11u, 10u, 0u);
79-
coords = bitfieldReverse(coords)>>(uvec3(32u)-log2_size);
28+
const uint tid = _NBL_GLSL_WORKGROUP_SIZE_*t+gl_LocalInvocationIndex;
8029

8130
nbl_glsl_complex sourceSpectrum = nbl_glsl_ext_FFT_impl_values[t];
82-
83-
vec2 uv = (vec2(coords.xy))/vec2(uvec2(1u)<<log2_size.xy)+vec2(0.5f)/vec2(textureSize(NormalizedKernel[ch],0));
31+
32+
//
33+
const uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
34+
vec2 uv = vec2(bitfieldReverse(coords.xy))/vec2(4294967296.f)+pc.params.kernel_half_pixel_size;
8435
//
8536
nbl_glsl_complex convSpectrum = textureLod(NormalizedKernel[ch],uv,0).xy;
8637
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(sourceSpectrum,convSpectrum);
@@ -89,29 +40,36 @@ void convolve(in uint item_per_thread_count, in uint ch)
8940

9041
void main()
9142
{
92-
const uint dataLength = nbl_glsl_ext_FFT_Parameters_t_getFFTLength();
93-
const uint item_per_thread_count = dataLength>>_NBL_GLSL_WORKGROUP_SIZE_LOG2_;
94-
const uint numChannels = nbl_glsl_ext_FFT_Parameters_t_getNumChannels();
95-
for(uint ch = 0u; ch < numChannels; ++ch)
43+
const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
44+
const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
45+
46+
for(uint ch=0u; ch<=nbl_glsl_ext_FFT_Parameters_t_getMaxChannel(); ++ch)
9647
{
9748
// Load Values into local memory
9849
for(uint t=0u; t<item_per_thread_count; t++)
9950
{
10051
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
101-
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getCoordinates(tid),ch);
52+
const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
53+
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getPaddedCoordinates(tid,log2FFTSize,trueDim),ch);
10254
}
103-
nbl_glsl_ext_FFT_preloaded(false,dataLength);
55+
nbl_glsl_ext_FFT_preloaded(false,log2FFTSize);
10456
barrier();
10557

10658
convolve(item_per_thread_count,ch);
10759

10860
barrier();
109-
nbl_glsl_ext_FFT_preloaded(true,dataLength);
61+
nbl_glsl_ext_FFT_preloaded(true,log2FFTSize);
11062
// write out to main memory
63+
// we override the setting to happen with padded coordinates because we dont want the padding to be written at all
11164
for(uint t=0u; t<item_per_thread_count; t++)
11265
{
11366
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
114-
nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),ch,nbl_glsl_ext_FFT_impl_values[t]);
67+
const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
68+
// we also prevent certain threads from writing the memory out
69+
const uint padding = ((0x1u<<log2FFTSize)-trueDim)>>1u;
70+
const uint shifted = tid-padding;
71+
if (tid>=padding && shifted<trueDim)
72+
nbl_glsl_ext_FFT_setData(ivec3(nbl_glsl_ext_FFT_getCoordinates(shifted)),ch,nbl_glsl_ext_FFT_impl_values[t]);
11573
}
11674
}
11775
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
2+
3+
// Input Descriptor
4+
layout(set=0, binding=0) uniform sampler2D inputImage;
5+
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
6+
7+
#include <nbl/builtin/glsl/math/complex.glsl>
8+
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
9+
{
10+
ivec2 inputImageSize = textureSize(inputImage, 0);
11+
vec2 normalizedCoords = (vec2(coordinate.xy)+vec2(0.5f))/(vec2(inputImageSize)*KERNEL_SCALE);
12+
vec4 texelValue = textureLod(inputImage, normalizedCoords+vec2(0.5-0.5/KERNEL_SCALE), -log2(KERNEL_SCALE));
13+
return nbl_glsl_complex(texelValue[channel], 0.0f);
14+
}
15+
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
16+
17+
18+
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
Lines changed: 19 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,37 @@
1-
#ifndef _NBL_GLSL_WORKGROUP_SIZE_
2-
#define _NBL_GLSL_WORKGROUP_SIZE_ 256
3-
#endif
41
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
52

6-
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
7-
#define _NBL_GLSL_EXT_FFT_GET_DATA_DEFINED_
8-
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
9-
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
10-
#include "nbl/builtin/glsl/ext/FFT/fft.glsl"
11-
12-
// Input Descriptor
13-
14-
layout(set=0, binding=0) readonly restrict buffer InputBuffer
15-
{
16-
nbl_glsl_complex inData[];
17-
};
18-
193
// Output Descriptor
20-
214
layout(set=0, binding=1, rgba16f) uniform image2D outImage;
5+
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
226

23-
// Get/Set Data Function
247

8+
#include "extra_parameters.glsl"
259
layout(push_constant) uniform PushConstants
2610
{
27-
nbl_glsl_ext_FFT_Parameters_t params;
11+
image_store_parameters_t params;
2812
} pc;
13+
#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
2914

30-
nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters() {
31-
nbl_glsl_ext_FFT_Parameters_t ret;
32-
ret = pc.params;
33-
return ret;
34-
}
35-
36-
nbl_glsl_complex nbl_glsl_ext_FFT_getData(in uvec3 coordinate, in uint channel)
15+
nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
3716
{
38-
nbl_glsl_complex retValue = nbl_glsl_complex(0, 0);
39-
uvec3 dimension = nbl_glsl_ext_FFT_Parameters_t_getDimensions();
40-
uint index = channel * (dimension.x * dimension.y * dimension.z) + coordinate.z * (dimension.x * dimension.y) + coordinate.y * (dimension.x) + coordinate.x;
41-
retValue = inData[index];
42-
return retValue;
17+
return pc.params.fft;
4318
}
19+
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
4420

21+
22+
#include <nbl/builtin/glsl/math/complex.glsl>
4523
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
4624
{
47-
const ivec2 coords = ivec2(coordinate.xy);
48-
49-
vec4 color_value = imageLoad(outImage, coords);
50-
color_value[channel] = complex_value.x;
51-
imageStore(outImage, coords, color_value);
25+
const ivec2 coords = ivec2(coordinate.xy)-pc.params.unpad_offset;
26+
27+
if (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageSize(outImage),coords)))
28+
{
29+
vec4 color_value = imageLoad(outImage, coords);
30+
color_value[channel] = complex_value.x;
31+
imageStore(outImage, coords, color_value);
32+
}
5233
}
34+
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
5335

54-
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint channel)
55-
{
56-
uvec3 max_coord = nbl_glsl_ext_FFT_Parameters_t_getDimensions() - uvec3(1u);
57-
uvec3 clamped_coord = min(coordinate, max_coord);
58-
59-
bool is_out_of_range = any(bvec3(coordinate!=clamped_coord));
60-
61-
uint paddingType = nbl_glsl_ext_FFT_Parameters_t_getPaddingType();
62-
63-
if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == paddingType && is_out_of_range) {
64-
return nbl_glsl_complex(0, 0);
65-
}
66-
67-
return nbl_glsl_ext_FFT_getData(clamped_coord, channel);
68-
}
6936

70-
void main()
71-
{
72-
const uint numChannels = nbl_glsl_ext_FFT_Parameters_t_getNumChannels();
73-
for(uint ch = 0u; ch < numChannels; ++ch)
74-
{
75-
nbl_glsl_ext_FFT(true, ch);
76-
}
77-
}
37+
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"

0 commit comments

Comments
 (0)