Skip to content

Commit 8863194

Browse files
committed
PR comment changes and some more additions for subgroups
1 parent 67472ba commit 8863194

File tree

3 files changed

+48
-8
lines changed

3 files changed

+48
-8
lines changed

include/nbl/builtin/hlsl/subgroup/arithmetic_portability.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <nbl/builtin/hlsl/subgroup/basic_portability.hlsl>
66
#endif
77

8-
static uint4 WHOLE_WAVE = ~0;
8+
static const uint4 WHOLE_WAVE = ~0; // REVIEW: Confirm this is proper placement and definition point
99

1010
namespace nbl
1111
{

include/nbl/builtin/hlsl/subgroup/arithmetic_portability_impl.hlsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#ifndef _NBL_BUILTIN_HLSL_SUBGROUP_ARITHMETIC_PORTABILITY_IMPL_INCLUDED_
22
#define _NBL_BUILTIN_HLSL_SUBGROUP_ARITHMETIC_PORTABILITY_IMPL_INCLUDED_
33

4-
uint threadIndex : SV_GroupIndex; // Use HLSL terminology or follow GLSL (i.e. call them invocations instead of threads)?
4+
uint localInvocationIndex : SV_GroupIndex; // REVIEW: Discuss proper placement of SV_* values. They are not allowed to be defined inside a function scope, only as arguments of global variables in the shader.
55

66
namespace nbl
77
{
@@ -39,7 +39,7 @@ struct inclusive_scan<binops::bitwise_and>
3939
template<typename T>
4040
T operator()(const T x)
4141
{
42-
return WaveMultiPrefixAnd(x, WHOLE_WAVE) & x; // TODO (PentaKon): Should this use the binops::bitwise_and functor?
42+
return WaveMultiPrefixAnd(x, WHOLE_WAVE) & x;
4343
}
4444
};
4545

@@ -268,11 +268,11 @@ struct scan_base
268268
static const uint HalfSubgroupSize = WaveGetLaneCount()>>1u; // TODO (PentaKon): Replace with nbl_hlsl_SubgroupSize or nbl::hlsl::subgroup::Size
269269
static const uint LoMask = WaveGetLaneCount()-1u; // TODO (PentaKon): Replace with nbl_hlsl_SubgroupSize
270270
static const uint LastWorkgroupInvocation = _NBL_HLSL_WORKGROUP_SIZE_-1; // TODO (PentaKon): Where should this be defined?
271-
static const uint pseudoSubgroupInvocation = threadIndex&LoMask; // Also used in substructs
271+
static const uint pseudoSubgroupInvocation = localInvocationIndex&LoMask; // Also used in substructs, thus static const
272272

273273
static inclusive_scan<Binop,ScratchAccessor> create()
274274
{
275-
const uint pseudoSubgroupElectedInvocation = threadIndex&(~LoMask);
275+
const uint pseudoSubgroupElectedInvocation = localInvocationIndex&(~LoMask);
276276

277277
inclusive_scan<Binop,ScratchAccessor> retval;
278278

@@ -304,7 +304,7 @@ struct inclusive_scan : scan_base
304304
{
305305
static inclusive_scan<Binop,ScratchAccessor> create()
306306
{
307-
return scan_base<Binop,ScratchAccessor>::create(); // Is this correct?
307+
return scan_base<Binop,ScratchAccessor>::create(); // REVIEW: Is this correct?
308308
}
309309

310310
template<typename T, bool initializeScratch>
@@ -324,7 +324,7 @@ struct inclusive_scan : scan_base
324324
nbl::hlsl::subgroupBarrier();
325325
nbl::hlsl::subgroupMemoryBarrierShared();
326326
// Stone-Kogge adder
327-
// it seems that lanes below <HalfSubgroupSize/step are doing useless work,
327+
// (devsh): it seems that lanes below <HalfSubgroupSize/step are doing useless work,
328328
// but they're SIMD and adding an `if`/conditional execution is more expensive
329329
value = op(value,scratchAccessor.get(scanStoreOffset-1u));
330330
[[unroll]]

include/nbl/builtin/hlsl/subgroup/basic_portability.hlsl

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ namespace nbl
55
{
66
namespace hlsl
77
{
8-
// USE GLSL OR HLSL TERMINOLOGY? (Workgroup vs Threadgroup, Subgroup vs Wave, Invocation vs Thread)
98
static const uint MaxWorkgroupSizeLog2 = 11;
109
static const uint MaxWorkgroupSize = 0x1 << MaxWorkgroupSizeLog2;
1110
static const uint MinSubgroupSizeLog2 = 2;
@@ -20,9 +19,50 @@ namespace hlsl
2019
#endif
2120
static const uint MaxSubgroupSize = (0x1<<MaxSubgroupSizeLog2);
2221

22+
// REVIEW: No need for #ifdef check because these are always available in HLSL 2021 it seems
23+
uint subgroupSize() {
24+
return WaveGetLaneCount();
25+
}
2326

27+
uint subgroupSizeLog2() {
28+
return firstbithigh(subgroupSize());
29+
}
30+
31+
uint subgroupInvocationID() {
32+
return WaveGetLaneIndex();
33+
}
34+
35+
// REVIEW: Masks don't seem to be available in Wave Ops
36+
uint64_t subgroupEqMask() { // return type for these must be 64-bit
37+
// REVIEW: I think the spec says that the subgroup size can potentially go up to 128 in which case uint64 won't work properly...
38+
// Should we somehow block shaders from running if subgroup size > 64?
39+
// Use uint64_t2 ?
40+
return 0x1 << subgroupInvocationID();
41+
}
42+
43+
uint64_t subgroupGeMask() {
44+
return 0xffffffff<<subgroupInvocationID();
45+
}
46+
47+
uint64_t subgroupGtMask() {
48+
return subgroupGeMask()<<1;
49+
}
50+
51+
uint64_t subgroupLeMask() {
52+
return ~subgroupGtMask();
53+
}
54+
55+
uint64_t subgroupLtMask() {
56+
return ~subgroupGeMask();
57+
}
58+
59+
// REVIEW: Should we define the Small/LargeSubgroupXMask and Small/LargeHalfSubgroupSizes?
2460

2561
// WAVE BARRIERS
62+
63+
void subgroupBarrier() {
64+
65+
}
2666
}
2767
}
2868

0 commit comments

Comments
 (0)