Skip to content

Commit 5aac44b

Browse files
Create scanning_append.hlsl
1 parent 8e61632 commit 5aac44b

File tree

1 file changed

+90
-0
lines changed

1 file changed

+90
-0
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Copyright (C) 2022 - DevSH Graphics Programming Sp. z O.O.
2+
// This file is part of the "Nabla Engine".
3+
// For conditions of distribution and use, see copyright notice in nabla.h
4+
#ifndef _NBL_BUILTIN_HLSL_SCANNING_APPEND_INCLUDED_
5+
#define _NBL_BUILTIN_HLSL_SCANNING_APPEND_INCLUDED_
6+
7+
namespace nbl
8+
{
9+
namespace hlsl
10+
{
11+
namespace scanning_append
12+
{
13+
14+
struct result_t
15+
{
16+
static result_t invalid()
17+
{
18+
result_t retval;
19+
retval.exclusivePrefixSum = retval.outputIndex = ~0u;
20+
return retval;
21+
}
22+
23+
uint outputIndex;
24+
uint exclusivePrefixSum;
25+
};
26+
27+
28+
// Elements with value 0 do not get appended
29+
// Note: If NBL_GLSL_EXT_shader_atomic_int64 is not present, then the call to these functions needs to be subgroup uniform
30+
template<class AtomicCounterAccessor>
31+
result_t non_negative(inout AtomicCounterAccessor accessor, in uint value)
32+
{
33+
const bool willAppend = bool(value);
34+
35+
result_t retval;
36+
#ifdef NBL_GLSL_EXT_shader_atomic_int64
37+
uint64_t add = value;
38+
if (willAppend)
39+
add |= 0x100000000ull;
40+
const uint64_t count_reduction = accessor.fetchIncr(add);
41+
retval.outputIndex = uint(count_reduction>>32);
42+
retval.exclusivePrefixSum = uint(count_reduction);
43+
#else
44+
#error "Untested Path, won't touch this until we actually need to ship something on Vulkan mobile or GL!"
45+
uint localIndex = subgroup::ballotExclusiveBitCount(subgroup::ballot(willAppend));
46+
uint partialPrefix = subgroup::exclusiveAdd(value);
47+
48+
uint subgroupIndex,subgroupPrefix;
49+
// elect last invocation
50+
const uint lastSubgroupInvocationID = subgroup::Size-1u;
51+
if (subgroup::InvocationID==lastSubgroupInvocationID)
52+
{
53+
// crude mutex, reuse MSB bit
54+
const uint lockBit = 0x80000000u;
55+
// first subgroup to set the bit to 1 (old value 0) proceeds with the lock
56+
while (accessor.fetchOrCount(lockBit)) {}
57+
// now MSB is always 1
58+
subgroupPrefix = accessor.fetchIncrSum(partialPrefix+value);
59+
// set the MSB to 0 (unlock) while adding, by making sure MSB overflows
60+
uint subgroupCount = localIndex;
61+
if (willAppend)
62+
subgroupCount++;
63+
subgroupIndex = accessor.fetchIncrCount(lockBit|subgroupCount);
64+
}
65+
retval.outputIndex = subgroup::broadcast(subgroupIndex,lastSubgroupInvocationID)+localIndex;
66+
retval.exclusivePrefixSum = subgroup::broadcast(subgroupPrefix,lastSubgroupInvocationID)+partialPrefix;
67+
#endif
68+
return retval;
69+
}
70+
71+
// optimized version which tries to omit the atomicAdd and locks if it can, in return it may return garbage/invalid value when invocation's `value==0`
72+
template<class AtomicCounterAccessor>
73+
result_t positive(inout AtomicCounterAccessor accessor, in uint value)
74+
{
75+
const bool willAppend = bool(value);
76+
#ifdef NBL_GLSL_EXT_shader_atomic_int64
77+
if (willAppend)
78+
#else
79+
if (WaveActiveAnyTrue(willAppend))
80+
#endif
81+
return non_negative<AtomicCounterAccessor>(accessor,value);
82+
83+
return result_t::invalid();
84+
}
85+
86+
}
87+
}
88+
}
89+
90+
#endif

0 commit comments

Comments
 (0)