packing 3 enums into single uint glsl

Erfan-Ahmadi · Erfan-Ahmadi · commit f0c65542c080 · 2021-02-14T13:45:57.000+03:30
diff --git a/examples_tests/49.ComputeFFT/main.cpp b/examples_tests/49.ComputeFFT/main.cpp
@@ -471,15 +471,15 @@ int main()
 	// Convolution
 	auto convolveDescriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(convolvePipelineLayout->getDescriptorSetLayout(0u)));
 	updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_1, fftOutputBuffer_KernelNormalized, fftOutputBuffer_0);
-
-	// IFFT X
-	auto fftDescriptorSet_IFFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
-	FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_X.get(), fftOutputBuffer_0, fftOutputBuffer_1);
-
+	
 	// IFFT Y
 	auto fftDescriptorSet_IFFT_Y = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
-	FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_Y.get(), fftOutputBuffer_1, fftOutputBuffer_0);
+	FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_Y.get(), fftOutputBuffer_0, fftOutputBuffer_1);
 	
+	// IFFT X
+	auto fftDescriptorSet_IFFT_X = driver->createGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>(fftPipelineLayout_SSBOInput->getDescriptorSetLayout(0u)));
+	FFTClass::updateDescriptorSet(driver, fftDescriptorSet_IFFT_X.get(), fftOutputBuffer_1, fftOutputBuffer_0);
+
 	auto removePaddingShader = createShader_RemovePadding(driver, am);
 	auto removePaddingPipelineLayout = getPipelineLayout_RemovePadding(driver);
 	auto removePaddingPipeline = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(removePaddingPipelineLayout), std::move(removePaddingShader));
@@ -519,18 +519,18 @@ int main()
 		driver->pushConstants(convolvePipelineLayout.get(), nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 4, sizeof(uint32_t), &srcNumChannels); // numSrcChannels
 		dispatchHelper_Convolution(driver, convolveDispatchInfo);
 		
-		// Convolved IFFT X
-		driver->bindComputePipeline(fftPipeline_SSBOInput.get());
-		driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_IFFT_X.get(), nullptr);
-		FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::X, true);
-		FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
-		
 		// Convolved IFFT Y
 		driver->bindComputePipeline(fftPipeline_SSBOInput.get());
 		driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_IFFT_Y.get(), nullptr);
 		FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::Y, true);
 		FFTClass::dispatchHelper(driver, fftDispatchInfo_Vertical);
 
+		// Convolved IFFT X
+		driver->bindComputePipeline(fftPipeline_SSBOInput.get());
+		driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_SSBOInput.get(), 0u, 1u, &fftDescriptorSet_IFFT_X.get(), nullptr);
+		FFTClass::pushConstants(driver, fftPipelineLayout_SSBOInput.get(), paddedDim, paddedDim, FFTClass::Direction::X, true);
+		FFTClass::dispatchHelper(driver, fftDispatchInfo_Horizontal);
+		
 		// Remove Padding and Copy to GPU Image
 		driver->bindComputePipeline(removePaddingPipeline.get());
 		driver->bindDescriptorSets(EPBP_COMPUTE, removePaddingPipelineLayout.get(), 0u, 1u, &removePaddingDescriptorSet.get(), nullptr);
diff --git a/include/nbl/builtin/glsl/ext/FFT/default_compute_fft.comp b/include/nbl/builtin/glsl/ext/FFT/default_compute_fft.comp
@@ -100,7 +100,9 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint cha
 	
 	bool is_out_of_range = any(bvec3(coordinate!=clamped_coord));
 
-	if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == pc.padding_type && is_out_of_range) {
+	uint paddingType = nbl_glsl_ext_FFT_getPaddingType();
+
+	if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == paddingType && is_out_of_range) {
 		return nbl_glsl_complex(0, 0);
 	}
 	
diff --git a/include/nbl/builtin/glsl/ext/FFT/fft.glsl b/include/nbl/builtin/glsl/ext/FFT/fft.glsl
@@ -45,15 +45,14 @@
 #define _NBL_GLSL_EXT_FFT_CLAMP_TO_EDGE_ 0
 #define _NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ 1
 
+//TODO: investigate why putting this uint between the 2 other uvec3's don't work
 #ifndef _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
 #define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
 layout(push_constant) uniform PushConstants
 {
     layout (offset = 0) uvec3 dimension;
     layout (offset = 16) uvec3 padded_dimension;
-	layout (offset = 32) uint direction;
-    layout (offset = 36) uint is_inverse;
-    layout (offset = 40) uint padding_type; // clamp_to_edge or fill_with_zero
+	layout (offset = 32) uint direction_isInverse_paddingType; // packed into a uint
 } pc;
 #endif
 
@@ -100,29 +99,43 @@ nbl_glsl_complex nbl_glsl_ext_FFT_twiddleInverse(in uint threadId, in uint itera
     return nbl_glsl_complex_conjugate(nbl_glsl_ext_FFT_twiddle(threadId, iteration, logTwoN));
 }
 
+uint nbl_glsl_ext_FFT_getDirection() {
+    return (pc.direction_isInverse_paddingType >> 16) & 0x000000ff;
+}
+bool nbl_glsl_ext_FFT_getIsInverse() {
+    return bool((pc.direction_isInverse_paddingType >> 8) & 0x000000ff);
+}
+uint nbl_glsl_ext_FFT_getPaddingType() {
+    return (pc.direction_isInverse_paddingType) & 0x000000ff;
+}
+
 uint nbl_glsl_ext_FFT_getChannel()
 {
-    return gl_WorkGroupID[pc.direction];
+    uint direction = nbl_glsl_ext_FFT_getDirection();
+    return gl_WorkGroupID[direction];
 }
 
 uvec3 nbl_glsl_ext_FFT_getCoordinates(in uint tidx)
 {
+    uint direction = nbl_glsl_ext_FFT_getDirection();
     uvec3 tmp = gl_WorkGroupID;
-    tmp[pc.direction] = tidx;
+    tmp[direction] = tidx;
     return tmp;
 }
 
 uvec3 nbl_glsl_ext_FFT_getBitReversedCoordinates(in uvec3 coords, in uint leadingZeroes)
 {
-    uint bitReversedIndex = bitfieldReverse(coords[pc.direction]) >> leadingZeroes;
+    uint direction = nbl_glsl_ext_FFT_getDirection();
+    uint bitReversedIndex = bitfieldReverse(coords[direction]) >> leadingZeroes;
     uvec3 tmp = coords;
-    tmp[pc.direction] = bitReversedIndex;
+    tmp[direction] = bitReversedIndex;
     return tmp;
 }
 
 uint nbl_glsl_ext_FFT_getDimLength(uvec3 dimension)
 {
-    return dimension[pc.direction];
+    uint direction = nbl_glsl_ext_FFT_getDirection();
+    return dimension[direction];
 }
 
 void nbl_glsl_ext_FFT()
@@ -134,6 +147,8 @@ void nbl_glsl_ext_FFT()
 
 	uint channel = nbl_glsl_ext_FFT_getChannel();
     
+    bool is_inverse = nbl_glsl_ext_FFT_getIsInverse();
+
 	// Pass 0: Bit Reversal
 	uint leadingZeroes = nbl_glsl_clz(dataLength) + 1u;
 	uint logTwo = 32u - leadingZeroes;
@@ -192,7 +207,7 @@ void nbl_glsl_ext_FFT()
             uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
             nbl_glsl_complex shuffled_value = shuffled_values[t];
 
-            nbl_glsl_complex twiddle = (0u == pc.is_inverse) 
+            nbl_glsl_complex twiddle = (is_inverse) 
              ? nbl_glsl_ext_FFT_twiddle(tid, i, logTwo)
              : nbl_glsl_ext_FFT_twiddleInverse(tid, i, logTwo);
 
@@ -210,7 +225,7 @@ void nbl_glsl_ext_FFT()
     {
         uint tid = thread_offset + t * _NBL_GLSL_EXT_FFT_BLOCK_SIZE_X_DEFINED_;
 	    uvec3 coords = nbl_glsl_ext_FFT_getCoordinates(tid);
-        nbl_glsl_complex complex_value = (0u == pc.is_inverse) 
+        nbl_glsl_complex complex_value = (is_inverse) 
          ? current_values[t]
          : current_values[t] / dataLength;
 
diff --git a/include/nbl/ext/FFT/FFT.h b/include/nbl/ext/FFT/FFT.h
@@ -20,13 +20,13 @@ class FFT : public core::TotalInterface
 {
 	public:
 
-		enum class Direction : uint32_t {
+		enum class Direction : uint8_t {
 			X = 0,
 			Y = 1,
 			Z = 2,
 		};
 		
-		enum class PaddingType : uint32_t {
+		enum class PaddingType : uint8_t {
 			CLAMP_TO_EDGE = 0,
 			FILL_WITH_ZERO = 1,
 		};
@@ -201,12 +201,16 @@ class FFT : public core::TotalInterface
 			bool isInverse, 
 			PaddingType paddingType = PaddingType::CLAMP_TO_EDGE)
 		{
-			uint32_t is_inverse_u = isInverse;
+
+			uint8_t isInverse_u8 = isInverse;
+			uint8_t direction_u8 = static_cast<uint8_t>(direction);
+			uint8_t paddingType_u8 = static_cast<uint8_t>(paddingType);
+			
+			uint32_t packed = (direction_u8 << 16u) | (isInverse_u8 << 8u) | paddingType_u8;
+
 			driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, 0u, sizeof(uint32_t) * 3, &inputDimension);
 			driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 4, sizeof(uint32_t) * 3, &paddedInputDimension);
-			driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 8, sizeof(uint32_t), &direction);
-			driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 9, sizeof(uint32_t), &is_inverse_u);
-			driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 10, sizeof(uint32_t), &paddingType);
+			driver->pushConstants(pipelineLayout, nbl::video::IGPUSpecializedShader::ESS_COMPUTE, sizeof(uint32_t) * 8, sizeof(uint32_t), &packed);
 		}
 
 		// Kernel Normalization
diff --git a/src/nbl/ext/FFT/FFT.cpp b/src/nbl/ext/FFT/FFT.cpp
@@ -14,7 +14,7 @@ using namespace ext::FFT;
 
 core::SRange<const asset::SPushConstantRange> FFT::getDefaultPushConstantRanges()
 {
-	static const asset::SPushConstantRange ranges[5] =
+	static const asset::SPushConstantRange ranges[3] =
 	{
 		{
 			ISpecializedShader::ESS_COMPUTE,
@@ -31,18 +31,8 @@ core::SRange<const asset::SPushConstantRange> FFT::getDefaultPushConstantRanges(
 			sizeof(uint32_t) * 8,
 			sizeof(uint32_t)
 		},
-		{
-			ISpecializedShader::ESS_COMPUTE,
-			sizeof(uint32_t) * 9,
-			sizeof(uint32_t)
-		},
-		{
-			ISpecializedShader::ESS_COMPUTE,
-			sizeof(uint32_t) * 10,
-			sizeof(uint32_t)
-		},
 	};
-	return {ranges, ranges+5};
+	return {ranges, ranges+3};
 }
 
 core::SRange<const video::IGPUDescriptorSetLayout::SBinding> FFT::getDefaultBindings(video::IVideoDriver* driver, DataType inputType)

Original file line number	Diff line number	Diff line change
`@@ -100,7 +100,9 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in uvec3 coordinate, in uint cha`
`100`	`100`
`101`	`101`	`bool is_out_of_range = any(bvec3(coordinate!=clamped_coord));`
`102`	`102`
`103`		`- if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == pc.padding_type && is_out_of_range) {`
	`103`	`+ uint paddingType = nbl_glsl_ext_FFT_getPaddingType();`
	`104`	`+`
	`105`	`+ if (_NBL_GLSL_EXT_FFT_FILL_WITH_ZERO_ == paddingType && is_out_of_range) {`
`104`	`106`	`return nbl_glsl_complex(0, 0);`
`105`	`107`	`}`
`106`	`108`