Skip to content

Commit 209adb4

Browse files
committed
added some comments to config funcs for future debugging
1 parent 83991b9 commit 209adb4

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#define _NBL_BUILTIN_HLSL_WORKGROUP2_ARITHMETIC_CONFIG_INCLUDED_
66

77
#include "nbl/builtin/hlsl/cpp_compat.hlsl"
8+
#include "nbl/builtin/hlsl/tuple.hlsl"
89

910
namespace nbl
1011
{
@@ -52,16 +53,16 @@ struct ArithmeticConfiguration
5253
static_assert(VirtualWorkgroupSize<=WorkgroupSize*SubgroupSize);
5354

5455
using items_per_invoc_t = impl::items_per_invocation<virtual_wg_t, _ItemsPerInvocation>;
55-
// NBL_CONSTEXPR_STATIC_INLINE uint32_t2 ItemsPerInvocation; TODO? doesn't allow inline definitions for uint32_t2 for some reason, uint32_t[2] as well ; declaring out of line results in not constant expression
56+
using ItemsPerInvocation = tuple<integral_constant<uint16_t,items_per_invoc_t::value0>,integral_constant<uint16_t,items_per_invoc_t::value1>,integral_constant<uint16_t,items_per_invoc_t::value2> >;
5657
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_0 = items_per_invoc_t::value0;
5758
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_1 = items_per_invoc_t::value1;
5859
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_2 = items_per_invoc_t::value2;
59-
static_assert(ItemsPerInvocation_1<=4, "3 level scan would have been needed with this config!");
6060

6161
NBL_CONSTEXPR_STATIC_INLINE uint16_t __ItemsPerVirtualWorkgroupLog2 = mpl::max_v<uint16_t, WorkgroupSizeLog2-SubgroupSizeLog2, SubgroupSizeLog2>;
6262
NBL_CONSTEXPR_STATIC_INLINE uint16_t __ItemsPerVirtualWorkgroup = uint16_t(0x1u) << __ItemsPerVirtualWorkgroupLog2;
6363
NBL_CONSTEXPR_STATIC_INLINE uint16_t __SubgroupsPerVirtualWorkgroup = __ItemsPerVirtualWorkgroup / ItemsPerInvocation_1;
6464

65+
// user specified the shared mem size of uint32_ts
6566
NBL_CONSTEXPR_STATIC_INLINE uint32_t SharedScratchElementCount = conditional_value<LevelCount==1,uint16_t,
6667
0,
6768
conditional_value<LevelCount==3,uint16_t,
@@ -75,11 +76,16 @@ struct ArithmeticConfiguration
7576
return glsl::gl_SubgroupInvocationID()==SubgroupSize-1;
7677
}
7778

79+
// gets a subgroupID as if each workgroup has (VirtualWorkgroupSize/SubgroupSize) subgroups
80+
// each subgroup does work (VirtualWorkgroupSize/WorkgroupSize) times, the index denoted by workgroupInVirtualIndex
7881
static uint32_t virtualSubgroupID(const uint32_t subgroupID, const uint32_t workgroupInVirtualIndex)
7982
{
8083
return workgroupInVirtualIndex * (WorkgroupSize >> SubgroupSizeLog2) + subgroupID;
8184
}
8285

86+
// get a coalesced index to store for the next level in shared mem, e.g. level 0 -> level 1
87+
// specify the next level to store values for in template param
88+
// at level==LevelCount-1, it is guaranteed to have SubgroupSize elements
8389
template<uint16_t level>
8490
static uint32_t sharedStoreIndex(const uint32_t subgroupID)
8591
{
@@ -102,6 +108,7 @@ struct ArithmeticConfiguration
102108
return sharedStoreIndex<level>(virtualID);
103109
}
104110

111+
// get the coalesced index in shared mem at the current level
105112
template<uint16_t level>
106113
static uint32_t sharedLoadIndex(const uint32_t invocationIndex, const uint32_t component)
107114
{

0 commit comments

Comments
 (0)