5
5
#define _NBL_BUILTIN_HLSL_WORKGROUP2_ARITHMETIC_CONFIG_INCLUDED_
6
6
7
7
#include "nbl/builtin/hlsl/cpp_compat.hlsl"
8
+ #include "nbl/builtin/hlsl/tuple.hlsl"
8
9
9
10
namespace nbl
10
11
{
@@ -52,16 +53,16 @@ struct ArithmeticConfiguration
52
53
static_assert (VirtualWorkgroupSize<=WorkgroupSize*SubgroupSize);
53
54
54
55
using items_per_invoc_t = impl::items_per_invocation<virtual_wg_t, _ItemsPerInvocation>;
55
- // NBL_CONSTEXPR_STATIC_INLINE uint32_t2 ItemsPerInvocation; TODO? doesn't allow inline definitions for uint32_t2 for some reason, uint32_t[2] as well ; declaring out of line results in not constant expression
56
+ using ItemsPerInvocation = tuple<integral_constant<uint16_t,items_per_invoc_t::value0>,integral_constant<uint16_t,items_per_invoc_t::value1>,integral_constant<uint16_t,items_per_invoc_t::value2> >;
56
57
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_0 = items_per_invoc_t::value0;
57
58
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_1 = items_per_invoc_t::value1;
58
59
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_2 = items_per_invoc_t::value2;
59
- static_assert (ItemsPerInvocation_1<=4 , "3 level scan would have been needed with this config!" );
60
60
61
61
NBL_CONSTEXPR_STATIC_INLINE uint16_t __ItemsPerVirtualWorkgroupLog2 = mpl::max_v<uint16_t, WorkgroupSizeLog2-SubgroupSizeLog2, SubgroupSizeLog2>;
62
62
NBL_CONSTEXPR_STATIC_INLINE uint16_t __ItemsPerVirtualWorkgroup = uint16_t (0x1u) << __ItemsPerVirtualWorkgroupLog2;
63
63
NBL_CONSTEXPR_STATIC_INLINE uint16_t __SubgroupsPerVirtualWorkgroup = __ItemsPerVirtualWorkgroup / ItemsPerInvocation_1;
64
64
65
+ // user specified the shared mem size of uint32_ts
65
66
NBL_CONSTEXPR_STATIC_INLINE uint32_t SharedScratchElementCount = conditional_value<LevelCount==1 ,uint16_t,
66
67
0 ,
67
68
conditional_value<LevelCount==3 ,uint16_t,
@@ -75,11 +76,16 @@ struct ArithmeticConfiguration
75
76
return glsl::gl_SubgroupInvocationID ()==SubgroupSize-1 ;
76
77
}
77
78
79
+ // gets a subgroupID as if each workgroup has (VirtualWorkgroupSize/SubgroupSize) subgroups
80
+ // each subgroup does work (VirtualWorkgroupSize/WorkgroupSize) times, the index denoted by workgroupInVirtualIndex
78
81
static uint32_t virtualSubgroupID (const uint32_t subgroupID, const uint32_t workgroupInVirtualIndex)
79
82
{
80
83
return workgroupInVirtualIndex * (WorkgroupSize >> SubgroupSizeLog2) + subgroupID;
81
84
}
82
85
86
+ // get a coalesced index to store for the next level in shared mem, e.g. level 0 -> level 1
87
+ // specify the next level to store values for in template param
88
+ // at level==LevelCount-1, it is guaranteed to have SubgroupSize elements
83
89
template<uint16_t level>
84
90
static uint32_t sharedStoreIndex (const uint32_t subgroupID)
85
91
{
@@ -102,6 +108,7 @@ struct ArithmeticConfiguration
102
108
return sharedStoreIndex<level>(virtualID);
103
109
}
104
110
111
+ // get the coalesced index in shared mem at the current level
105
112
template<uint16_t level>
106
113
static uint32_t sharedLoadIndex (const uint32_t invocationIndex, const uint32_t component)
107
114
{
0 commit comments