1
- // WorkGroup Size
2
- #ifndef _NBL_GLSL_WORKGROUP_SIZE_
3
- #define _NBL_GLSL_WORKGROUP_SIZE_ 256
4
- #endif
5
1
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
6
2
7
-
8
- #define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
9
- #define _NBL_GLSL_EXT_FFT_GET_DATA_DEFINED_
10
- #define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
11
- #define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
12
- #include "nbl/builtin/glsl/ext/FFT/fft.glsl"
13
-
14
- // Input Descriptor
3
+ #include <nbl/builtin/glsl/ext/FFT/types.glsl>
15
4
16
- layout(set=0, binding=0) buffer restrict InputOutputBuffer
17
- {
18
- nbl_glsl_complex inoutData[];
19
- };
5
+ layout(set=0, binding=2) uniform sampler2D NormalizedKernel[3];
20
6
21
- layout(set=0, binding=1) uniform sampler2D NormalizedKernel[3];
22
-
23
- // Get/Set Data Function
7
+ #include "extra_parameters.glsl"
24
8
layout(push_constant) uniform PushConstants
25
9
{
26
- nbl_glsl_ext_FFT_Parameters_t params;
10
+ convolve_parameters_t params;
27
11
} pc;
12
+ #define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
28
13
29
- nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
30
- {
31
- nbl_glsl_ext_FFT_Parameters_t ret;
32
- ret = pc.params;
33
- return ret;
34
- }
35
-
36
- nbl_glsl_complex nbl_glsl_ext_FFT_getData(in uvec3 coordinate, in uint channel)
14
+ nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
37
15
{
38
- nbl_glsl_complex retValue = nbl_glsl_complex(0, 0);
39
- uvec3 dimension = nbl_glsl_ext_FFT_Parameters_t_getDimensions();
40
- uint index = channel * (dimension.x * dimension.y * dimension.z) + coordinate.z * (dimension.x * dimension.y) + coordinate.y * (dimension.x) + coordinate.x;
41
- retValue = inoutData[index];
42
- return retValue;
16
+ return pc.params.fft;
43
17
}
18
+ #define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
44
19
45
- void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
46
- {
47
- uvec3 dimension = nbl_glsl_ext_FFT_Parameters_t_getPaddedDimensions();
48
- uint index = channel * (dimension.x * dimension.y * dimension.z) + coordinate.z * (dimension.x * dimension.y) + coordinate.y * (dimension.x) + coordinate.x;
49
- inoutData[index] = complex_value;
50
- }
51
-
52
- nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint channel) {
53
-
54
- uvec3 max_coord = nbl_glsl_ext_FFT_Parameters_t_getDimensions() - uvec3(1u);
55
- uvec3 clamped_coord = min(coordinate, max_coord);
56
-
57
- bool is_out_of_range = any(bvec3(coordinate!=clamped_coord));
58
-
59
- uint paddingType = nbl_glsl_ext_FFT_Parameters_t_getPaddingType();
60
-
61
- if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == paddingType && is_out_of_range) {
62
- return nbl_glsl_complex(0, 0);
63
- }
64
-
65
- return nbl_glsl_ext_FFT_getData(clamped_coord, channel);
66
- }
20
+ #define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_
21
+ #include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
67
22
68
23
void convolve(in uint item_per_thread_count, in uint ch)
69
24
{
70
25
// TODO: decouple kernel size from image size (can't get the math to work in my head)
71
- uvec3 dimension = nbl_glsl_ext_FFT_Parameters_t_getDimensions();
72
-
73
26
for(uint t=0u; t<item_per_thread_count; t++)
74
27
{
75
- uint tid = gl_LocalInvocationIndex + t * _NBL_GLSL_WORKGROUP_SIZE_;
76
- // TODO: refactor
77
- uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
78
- const uvec3 log2_size = uvec3(11u, 10u, 0u);
79
- coords = bitfieldReverse(coords)>>(uvec3(32u)-log2_size);
28
+ const uint tid = _NBL_GLSL_WORKGROUP_SIZE_*t+gl_LocalInvocationIndex;
80
29
81
30
nbl_glsl_complex sourceSpectrum = nbl_glsl_ext_FFT_impl_values[t];
82
-
83
- vec2 uv = (vec2(coords.xy))/vec2(uvec2(1u)<<log2_size.xy)+vec2(0.5f)/vec2(textureSize(NormalizedKernel[ch],0));
31
+
32
+ //
33
+ const uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
34
+ vec2 uv = vec2(bitfieldReverse(coords.xy))/vec2(4294967296.f)+pc.params.kernel_half_pixel_size;
84
35
//
85
36
nbl_glsl_complex convSpectrum = textureLod(NormalizedKernel[ch],uv,0).xy;
86
37
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(sourceSpectrum,convSpectrum);
@@ -89,29 +40,36 @@ void convolve(in uint item_per_thread_count, in uint ch)
89
40
90
41
void main()
91
42
{
92
- const uint dataLength = nbl_glsl_ext_FFT_Parameters_t_getFFTLength ();
93
- const uint item_per_thread_count = dataLength>> _NBL_GLSL_WORKGROUP_SIZE_LOG2_;
94
- const uint numChannels = nbl_glsl_ext_FFT_Parameters_t_getNumChannels();
95
- for(uint ch = 0u; ch < numChannels ; ++ch)
43
+ const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize ();
44
+ const uint item_per_thread_count = 0x1u<<(log2FFTSize- _NBL_GLSL_WORKGROUP_SIZE_LOG2_) ;
45
+
46
+ for(uint ch= 0u; ch<=nbl_glsl_ext_FFT_Parameters_t_getMaxChannel() ; ++ch)
96
47
{
97
48
// Load Values into local memory
98
49
for(uint t=0u; t<item_per_thread_count; t++)
99
50
{
100
51
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
101
- nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getCoordinates(tid),ch);
52
+ const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
53
+ nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getPaddedCoordinates(tid,log2FFTSize,trueDim),ch);
102
54
}
103
- nbl_glsl_ext_FFT_preloaded(false,dataLength );
55
+ nbl_glsl_ext_FFT_preloaded(false,log2FFTSize );
104
56
barrier();
105
57
106
58
convolve(item_per_thread_count,ch);
107
59
108
60
barrier();
109
- nbl_glsl_ext_FFT_preloaded(true,dataLength );
61
+ nbl_glsl_ext_FFT_preloaded(true,log2FFTSize );
110
62
// write out to main memory
63
+ // we override the setting to happen with padded coordinates because we dont want the padding to be written at all
111
64
for(uint t=0u; t<item_per_thread_count; t++)
112
65
{
113
66
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
114
- nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),ch,nbl_glsl_ext_FFT_impl_values[t]);
67
+ const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
68
+ // we also prevent certain threads from writing the memory out
69
+ const uint padding = ((0x1u<<log2FFTSize)-trueDim)>>1u;
70
+ const uint shifted = tid-padding;
71
+ if (tid>=padding && shifted<trueDim)
72
+ nbl_glsl_ext_FFT_setData(ivec3(nbl_glsl_ext_FFT_getCoordinates(shifted)),ch,nbl_glsl_ext_FFT_impl_values[t]);
115
73
}
116
74
}
117
75
}
0 commit comments