Skip to content

Commit f416c8d

Browse files
managed to get the work decimation going
1 parent 06f12fd commit f416c8d

File tree

3 files changed

+10
-5
lines changed

3 files changed

+10
-5
lines changed

examples_tests/49.ComputeFFT/fft_convolve_ifft.comp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,16 @@ void main()
6060
barrier();
6161
nbl_glsl_ext_FFT_preloaded(true,log2FFTSize);
6262
// write out to main memory
63+
// we override the setting to happen with padded coordinates because we dont want the padding to be written at all
6364
for(uint t=0u; t<item_per_thread_count; t++)
6465
{
6566
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
66-
nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),ch,nbl_glsl_ext_FFT_impl_values[t]);
67+
const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
68+
// we also prevent certain threads from writing the memory out
69+
const uint padding = ((0x1u<<log2FFTSize)-trueDim)>>1u;
70+
const uint shifted = tid-padding;
71+
if (tid>=padding && shifted<trueDim)
72+
nbl_glsl_ext_FFT_setData(ivec3(nbl_glsl_ext_FFT_getCoordinates(shifted)),ch,nbl_glsl_ext_FFT_impl_values[t]);
6773
}
6874
}
6975
}

examples_tests/49.ComputeFFT/last_fft.comp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ layout(set=0, binding=1, rgba16f) uniform image2D outImage;
88
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
99
{
1010
// TODO PC
11-
const ivec2 padding = imageSize(outImage).x!=512u ? ivec2(384,664):ivec2(0);
11+
const ivec2 padding = imageSize(outImage).x!=512u ? ivec2(384,0):ivec2(0);
1212
const ivec2 coords = ivec2(coordinate.xy)-padding;
1313

14-
if (all(lessThanEqual(ivec2(0),coords))&&all(greaterThan(imageSize(outImage),coords)))
14+
if (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageSize(outImage),coords)))
1515
{
1616
vec4 color_value = imageLoad(outImage, coords);
1717
color_value[channel] = complex_value.x;

examples_tests/49.ComputeFFT/main.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -614,15 +614,14 @@ int main()
614614
const ISampler::E_TEXTURE_CLAMP fftPadding[2] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR};
615615
const auto passes = FFTClass::buildParameters(false,srcNumChannels,srcDim,fftPushConstants,fftDispatchInfo,fftPadding,paddedSrcDim);
616616
{
617+
fftPushConstants[1].output_strides = fftPushConstants[1].input_strides; // override for less work and storage (dont need to store the extra Y-slices after iFFT)
617618
fftPushConstants[2].input_dimensions = fftPushConstants[1].input_dimensions;
618-
fftPushConstants[2].input_dimensions.y = 2048u;
619619
{
620620
fftPushConstants[2].input_dimensions.w = fftPushConstants[0].input_dimensions.w^0x80000000u;
621621
fftPushConstants[2].input_strides = fftPushConstants[1].output_strides;
622622
fftPushConstants[2].output_strides = fftPushConstants[0].input_strides;
623623
}
624624
fftDispatchInfo[2] = fftDispatchInfo[0];
625-
fftDispatchInfo[2].workGroupCount[1] = 2048;
626625
}
627626
assert(passes==2);
628627

0 commit comments

Comments
 (0)