1
- #ifndef _NBL_BUILTIN_HLSL_WORKGROUP_FFT_INCLUDED_
2
- #define _NBL_BUILTIN_HLSL_WORKGROUP_FFT_INCLUDED_
3
-
4
1
#include <nbl/builtin/hlsl/cpp_compat.hlsl>
5
2
#include <nbl/builtin/hlsl/concepts.hlsl>
6
3
#include <nbl/builtin/hlsl/fft/common.hlsl>
7
4
5
+ #ifndef _NBL_BUILTIN_HLSL_WORKGROUP_FFT_INCLUDED_
6
+ #define _NBL_BUILTIN_HLSL_WORKGROUP_FFT_INCLUDED_
7
+
8
8
// ------------------------------- COMMON -----------------------------------------
9
9
10
10
namespace nbl
@@ -36,11 +36,11 @@ struct ConstevalParameters
36
36
}
37
37
}
38
38
}
39
- // ------------------------------- END COMMON -----------------------------------------
39
+ // ------------------------------- END COMMON ---------------------------------------------
40
+
41
+ // -------------------------------- CPP ONLY ----------------------------------------------
40
42
41
- // ------------------------------- CPP ONLY -------------------------------------------
42
43
#ifndef __HLSL_VERSION
43
- #include <nbl/video/IPhysicalDevice.h>
44
44
45
45
namespace nbl
46
46
{
@@ -51,24 +51,30 @@ namespace workgroup
51
51
namespace fft
52
52
{
53
53
54
- inline std::pair<uint16_t, uint16_t> optimalFFTParameters (const video::ILogicalDevice* device, uint32_t inputArrayLength)
54
+ struct OptimalFFTParameters
55
+ {
56
+ uint16_t elementsPerInvocationLog2;
57
+ uint16_t workgroupSizeLog2;
58
+ };
59
+
60
+ inline OptimalFFTParameters optimalFFTParameters (const uint32_t maxWorkgroupSize, uint32_t inputArrayLength)
55
61
{
56
- uint32_t maxWorkgroupSize = *device->getPhysicalDevice ()->getLimits ().maxWorkgroupSize;
57
62
// This is the logic found in core::roundUpToPoT to get the log2
58
- uint16_t workgroupSizeLog2 = 1u + hlsl::findMSB (core::min (inputArrayLength / 2 , maxWorkgroupSize) - 1u);
59
- uint16_t elementPerInvocationLog2 = 1u + hlsl::findMSB (core::max ((inputArrayLength >> workgroupSizeLog2) - 1u, 1u));
60
- return { elementPerInvocationLog2, workgroupSizeLog2 };
63
+ const uint16_t workgroupSizeLog2 = 1u + findMSB (min (inputArrayLength / 2 , maxWorkgroupSize) - 1u);
64
+ const uint16_t elementsPerInvocationLog2 = 1u + findMSB (max ((inputArrayLength >> workgroupSizeLog2) - 1u, 1u));
65
+ const OptimalFFTParameters retVal = { elementsPerInvocationLog2, workgroupSizeLog2 };
66
+ return retVal;
61
67
}
62
68
63
69
}
64
70
}
65
71
}
66
72
}
67
-
68
73
// ------------------------------- END CPP ONLY -------------------------------------------
69
74
70
75
// ------------------------------- HLSL ONLY ----------------------------------------------
71
- #else
76
+
77
+ #else
72
78
73
79
#include "nbl/builtin/hlsl/subgroup/fft.hlsl"
74
80
#include "nbl/builtin/hlsl/workgroup/basic.hlsl"
0 commit comments