1
1
#ifndef _NBL_BUILTIN_HLSL_SUBGROUP_ARITHMETIC_PORTABILITY_IMPL_INCLUDED_
2
2
#define _NBL_BUILTIN_HLSL_SUBGROUP_ARITHMETIC_PORTABILITY_IMPL_INCLUDED_
3
3
4
- uint threadIndex : SV_GroupIndex ; // Use HLSL terminology or follow GLSL (i.e. call them invocations instead of threads)?
4
+ uint localInvocationIndex : SV_GroupIndex ; // REVIEW: Discuss proper placement of SV_* values. They are not allowed to be defined inside a function scope, only as arguments of global variables in the shader.
5
5
6
6
namespace nbl
7
7
{
@@ -39,7 +39,7 @@ struct inclusive_scan<binops::bitwise_and>
39
39
template<typename T>
40
40
T operator ()(const T x)
41
41
{
42
- return WaveMultiPrefixAnd (x, WHOLE_WAVE) & x; // TODO (PentaKon): Should this use the binops::bitwise_and functor?
42
+ return WaveMultiPrefixAnd (x, WHOLE_WAVE) & x;
43
43
}
44
44
};
45
45
@@ -268,11 +268,11 @@ struct scan_base
268
268
static const uint HalfSubgroupSize = WaveGetLaneCount ()>>1u; // TODO (PentaKon): Replace with nbl_hlsl_SubgroupSize or nbl::hlsl::subgroup::Size
269
269
static const uint LoMask = WaveGetLaneCount ()-1u; // TODO (PentaKon): Replace with nbl_hlsl_SubgroupSize
270
270
static const uint LastWorkgroupInvocation = _NBL_HLSL_WORKGROUP_SIZE_-1 ; // TODO (PentaKon): Where should this be defined?
271
- static const uint pseudoSubgroupInvocation = threadIndex &LoMask; // Also used in substructs
271
+ static const uint pseudoSubgroupInvocation = localInvocationIndex &LoMask; // Also used in substructs, thus static const
272
272
273
273
static inclusive_scan<Binop,ScratchAccessor> create ()
274
274
{
275
- const uint pseudoSubgroupElectedInvocation = threadIndex &(~LoMask);
275
+ const uint pseudoSubgroupElectedInvocation = localInvocationIndex &(~LoMask);
276
276
277
277
inclusive_scan<Binop,ScratchAccessor> retval;
278
278
@@ -304,7 +304,7 @@ struct inclusive_scan : scan_base
304
304
{
305
305
static inclusive_scan<Binop,ScratchAccessor> create ()
306
306
{
307
- return scan_base<Binop,ScratchAccessor>::create (); // Is this correct?
307
+ return scan_base<Binop,ScratchAccessor>::create (); // REVIEW: Is this correct?
308
308
}
309
309
310
310
template<typename T, bool initializeScratch>
@@ -324,7 +324,7 @@ struct inclusive_scan : scan_base
324
324
nbl::hlsl::subgroupBarrier ();
325
325
nbl::hlsl::subgroupMemoryBarrierShared ();
326
326
// Stone-Kogge adder
327
- // it seems that lanes below <HalfSubgroupSize/step are doing useless work,
327
+ // (devsh): it seems that lanes below <HalfSubgroupSize/step are doing useless work,
328
328
// but they're SIMD and adding an `if`/conditional execution is more expensive
329
329
value = op (value,scratchAccessor.get (scanStoreOffset-1u));
330
330
[[unroll]]
0 commit comments