Skip to content

Commit e8e41f4

Browse files
fix coalesced writes on the convolution FFT
prep for hardcoded Parameters_t Optimization strategies for FFT: - 2-4-1 optimization for real FFTs - hardcoded parameters that control loops, branches, coordinate calc and dynamic indexing - no more dynamic indexing of locals to create axis dependent stuff - tiled bloom (reduce the log(n) and the register pressure) - prescale the blur kernel to match the bitreverse permuted spectrum, also use use image or buffer? - use images as temporary storage (maybe hilbert curve will be nicer for caches)
1 parent 8b4e47c commit e8e41f4

File tree

5 files changed

+118
-3
lines changed

5 files changed

+118
-3
lines changed

examples_tests/49.ComputeFFT/fft_convolve_ifft.comp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,43 @@
11
layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in;
22

3-
#include <nbl/builtin/glsl/ext/FFT/types.glsl>
4-
53
layout(set=0, binding=2) uniform sampler2D NormalizedKernel[3];
64

5+
/* TODO: Hardcode the parameters for the frequent FFTs
6+
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
7+
{
8+
return uvec3(1280u,1024u,1u);
9+
}
10+
bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
11+
{
12+
return false;
13+
}
14+
uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
15+
{
16+
return 0u;
17+
}
18+
uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
19+
{
20+
return 2u;
21+
}
22+
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
23+
{
24+
return 11u;
25+
}
26+
uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
27+
{
28+
return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
29+
}
30+
uvec4 nbl_glsl_ext_FFT_Parameters_t_getInputStrides()
31+
{
32+
return uvec4(1024u,1u,0u,1024u*1280u);
33+
}
34+
uvec4 nbl_glsl_ext_FFT_Parameters_t_getOutputStrides()
35+
{
36+
return uvec4(1u,1280u,0u,1280u*1024u);
37+
}
38+
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
39+
*/
40+
741
#include "extra_parameters.glsl"
842
layout(push_constant) uniform PushConstants
943
{

examples_tests/49.ComputeFFT/image_first_fft.comp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,42 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint cha
1515
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
1616

1717

18+
/* TODO: Hardcode the parameters for the frequent FFTs
19+
#if _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_>512
20+
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
21+
{
22+
return uvec3(1280u,720u,1u);
23+
}
24+
bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
25+
{
26+
return false;
27+
}
28+
uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
29+
{
30+
return 1u;
31+
}
32+
uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
33+
{
34+
return 2u;
35+
}
36+
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
37+
{
38+
return 10u;
39+
}
40+
uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
41+
{
42+
return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
43+
}
44+
uvec4 nbl_glsl_ext_FFT_Parameters_t_getInputStrides()
45+
{
46+
return uvec4(0xdeadbeefu);
47+
}
48+
uvec4 nbl_glsl_ext_FFT_Parameters_t_getOutputStrides()
49+
{
50+
return uvec4(1024u,1u,0u,1024u*1280u);
51+
}
52+
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
53+
#endif
54+
*/
55+
1856
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"

examples_tests/49.ComputeFFT/last_fft.comp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,41 @@ layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) i
44
layout(set=0, binding=1, rgba16f) uniform image2D outImage;
55
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
66

7+
/* TODO: Hardcode the parameters for the frequent FFTs
8+
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
9+
{
10+
return uvec3(1280u,1024u,1u);
11+
}
12+
bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
13+
{
14+
return true;
15+
}
16+
uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
17+
{
18+
return 1u;
19+
}
20+
uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
21+
{
22+
return 2u;
23+
}
24+
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
25+
{
26+
return 10u;
27+
}
28+
uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
29+
{
30+
return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
31+
}
32+
uvec4 nbl_glsl_ext_FFT_Parameters_t_getInputStrides()
33+
{
34+
return uvec4(1u,1280u,0u,1280u*1024u);
35+
}
36+
uvec4 nbl_glsl_ext_FFT_Parameters_t_getOutputStrides()
37+
{
38+
return uvec4(0xdeadbeefu);
39+
}
40+
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
41+
*/
742

843
#include "extra_parameters.glsl"
944
layout(push_constant) uniform PushConstants

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,12 @@ int main()
592592
const ISampler::E_TEXTURE_CLAMP fftPadding[2] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR};
593593
const auto passes = FFTClass::buildParameters(false,srcNumChannels,srcDim,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim);
594594
{
595-
fftPushConstants[1].output_strides = fftPushConstants[1].input_strides; // override for less work and storage (dont need to store the extra padding of the last axis after iFFT)
595+
// override for less work and storage (dont need to store the extra padding of the last axis after iFFT)
596+
fftPushConstants[1].output_strides.x = fftPushConstants[0].input_strides.x;
597+
fftPushConstants[1].output_strides.y = fftPushConstants[0].input_strides.y;
598+
fftPushConstants[1].output_strides.z = fftPushConstants[1].input_strides.z;
599+
fftPushConstants[1].output_strides.w = fftPushConstants[1].input_strides.w;
600+
// iFFT
596601
fftPushConstants[2].input_dimensions = fftPushConstants[1].input_dimensions;
597602
{
598603
fftPushConstants[2].input_dimensions.w = fftPushConstants[0].input_dimensions.w^0x80000000u;

include/nbl/builtin/glsl/ext/FFT/parameters.glsl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters();
2424
#endif
2525

2626

27+
#ifndef _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
28+
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
2729
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
2830
{
2931
nbl_glsl_ext_FFT_Parameters_t params = nbl_glsl_ext_FFT_getParameters();
@@ -67,5 +69,6 @@ uvec4 nbl_glsl_ext_FFT_Parameters_t_getOutputStrides()
6769
nbl_glsl_ext_FFT_Parameters_t params = nbl_glsl_ext_FFT_getParameters();
6870
return params.output_strides;
6971
}
72+
#endif
7073

7174
#endif

0 commit comments

Comments
 (0)