use push constants for the blur kernel normalization.comp

devshgraphicsprogramming · devshgraphicsprogramming · commit ea7214f49eb0 · 2021-03-08T19:35:47.000+01:00
diff --git a/examples_tests/49.ComputeFFT/main.cpp b/examples_tests/49.ComputeFFT/main.cpp
@@ -404,6 +404,11 @@ int main()
 		FFTClass::updateDescriptorSet(driver, fftDescriptorSet_Ker_FFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
 		
 		// Normalization of FFT Y result
+		struct NormalizationPushConstants
+		{
+			ext::FFT::uvec4 stride;
+			ext::FFT::uvec4 bitreverse_shift;
+		};
 		auto fftPipelineLayout_KernelNormalization = [&]() -> auto
 		{
 			IGPUDescriptorSetLayout::SBinding bnd[] =
@@ -423,8 +428,12 @@ int main()
 					nullptr
 				},
 			};
+			SPushConstantRange pc_rng;
+			pc_rng.offset = 0u;
+			pc_rng.size = sizeof(NormalizationPushConstants);
+			pc_rng.stageFlags = ISpecializedShader::ESS_COMPUTE;
 			return driver->createGPUPipelineLayout(
-				nullptr,nullptr,
+				&pc_rng,&pc_rng+1u,
 				driver->createGPUDescriptorSetLayout(bnd,bnd+2),nullptr,nullptr,nullptr
 			);
 		}();
@@ -494,6 +503,14 @@ int main()
 		);
 		driver->bindComputePipeline(fftPipeline_KernelNormalization.get());
 		driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_KernelNormalization.get(), 0u, 1u, &fftDescriptorSet_KernelNormalization.get(), nullptr);
+		{
+			NormalizationPushConstants normalizationPC;
+			normalizationPC.stride = {1u,paddedKerDim.width,paddedKerDim.width*paddedKerDim.height,paddedKerDim.width*paddedKerDim.height}; // TODO: take from the Y FFT pass
+			normalizationPC.bitreverse_shift.x = 32-core::findMSB(paddedKerDim.width);
+			normalizationPC.bitreverse_shift.y = 32-core::findMSB(paddedKerDim.height);
+			normalizationPC.bitreverse_shift.z = 0;
+			driver->pushConstants(fftPipelineLayout_KernelNormalization.get(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(normalizationPC),&normalizationPC);
+		}
 		{
 			const uint32_t dispatchSizeX = (paddedKerDim.width-1u)/16u+1u;
 			const uint32_t dispatchSizeY = (paddedKerDim.height-1u)/16u+1u;
diff --git a/examples_tests/49.ComputeFFT/normalization.comp b/examples_tests/49.ComputeFFT/normalization.comp
@@ -10,18 +10,20 @@ layout(set=0, binding=0) restrict readonly buffer InBuffer
 
 layout(set=0, binding=1, rg16f) uniform image2D NormalizedKernel[3];
 
-void main()
+layout(push_constant) uniform PushConstants
 {
-// TODO: push constants
-	const uvec2 log2_sizes = findMSB(gl_WorkGroupSize*gl_NumWorkGroups).xy;
-	const uvec3 strides = uvec3(1u,0x1u<<log2_sizes.x,0x1u<<(log2_sizes.x+log2_sizes.y));
+	uvec4 strides;
+	uvec4 bitreverse_shift;
+} pc;
 
-	const float power = length(in_data[0]);
-	nbl_glsl_complex value = in_data[gl_GlobalInvocationID.x*strides.x+gl_GlobalInvocationID.y*strides.y+gl_GlobalInvocationID.z*strides.z]/power;
+void main()
+{
+	nbl_glsl_complex value = in_data[nbl_glsl_dot(gl_GlobalInvocationID,pc.strides.xyz)];
 	
+	const float power = length(in_data[0]);
 
-	uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>(uvec2(32u)-log2_sizes);
-	const nbl_glsl_complex shift = nbl_glsl_expImaginary(-float(coord.x+coord.y)*nbl_glsl_PI); // TODO: does this shift go away later?
-	value = nbl_glsl_complex_mul(value,shift);
+	const uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>pc.bitreverse_shift.xy;
+	const nbl_glsl_complex shift = nbl_glsl_expImaginary(-nbl_glsl_PI*float(coord.x+coord.y));
+	value = nbl_glsl_complex_mul(value,shift)/power;
 	imageStore(NormalizedKernel[gl_WorkGroupID.z],ivec2(coord),vec4(value,0.0,0.0));
 }
diff --git a/include/nbl/builtin/glsl/math/functions.glsl b/include/nbl/builtin/glsl/math/functions.glsl
@@ -7,6 +7,14 @@
 
 #include <nbl/builtin/glsl/math/constants.glsl>
 
+int nbl_glsl_dot(in ivec2 a, in ivec2 b) {return a.x*b.x+a.y*b.y;}
+uint nbl_glsl_dot(in uvec2 a, in uvec2 b) {return a.x*b.x+a.y*b.y;}
+int nbl_glsl_dot(in ivec3 a, in ivec3 b) {return a.x*b.x+a.y*b.y+a.z*b.z;}
+uint nbl_glsl_dot(in uvec3 a, in uvec3 b) {return a.x*b.x+a.y*b.y+a.z*b.z;}
+int nbl_glsl_dot(in ivec4 a, in ivec4 b) {return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;}
+uint nbl_glsl_dot(in uvec4 a, in uvec4 b) {return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;}
+
+//
 float nbl_glsl_erf(in float _x)
 {
     const float a1 = 0.254829592;