Skip to content

Commit d717916

Browse files
An alternative to the Indirect Scan (#374)
Console devs can suck it...
1 parent cb0c994 commit d717916

File tree

2 files changed

+76
-0
lines changed

2 files changed

+76
-0
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// No header guards!!! In case you want to have multiple append buffers,
2+
// you might want to `#include` this header multiple times with different suffices
3+
#ifndef NBL_GLSL_SCANNING_APPEND_FUNCNAME_SUFFFIX
4+
#define NBL_GLSL_SCANNING_APPEND_FUNCNAME_SUFFFIX
5+
#endif
6+
7+
// LSB
8+
#ifdef NBL_GLSL_EXT_shader_atomic_int64
9+
#define nbl_glsl_scanning_append_counter_t uint64_t
10+
#else
11+
#define nbl_glsl_scanning_append_counter_t uvec2
12+
#endif
13+
14+
struct nbl_glsl_scanning_append_result_t
15+
{
16+
uint outIndex;
17+
uint exclusivePrefix;
18+
};
19+
20+
// Elements with value 0 do not get appended
21+
// Note: If NBL_GLSL_EXT_shader_atomic_int64 is not present, then the call to this function needs to be subgroup uniform
22+
nbl_glsl_scanning_append_result_t NBL_GLSL_CONCATENATE2(nbl_glsl_scanning_append,NBL_GLSL_SCANNING_APPEND_FUNCNAME_SUFFFIX)(in uint value)
23+
{
24+
#ifndef NBL_GLSL_SCANNING_APPEND_COUNTER_NAME
25+
#error "Need to define NBL_GLSL_SCANNING_APPEND_COUNTER_NAME for the `nbl_glsl_scanning_append` function, cause GLSL is dumb and `buffer` cannot be passed around."
26+
#endif
27+
28+
const bool willAppend = bool(value);
29+
#ifdef NBL_GLSL_EXT_shader_atomic_int64
30+
uint64_t add = value;
31+
if (willAppend)
32+
add |= 0x100000000ull;
33+
const uint64_t count_reduction = atomicAdd(NBL_GLSL_SCANNING_APPEND_COUNTER_NAME,add);
34+
return nbl_glsl_scanning_append_result_t{uint(count_reduction>>32),uint(count_reduction)};
35+
#else
36+
#error "Untested Path, won't touch this until we actually need to ship something on Vulkan mobile or GL!"
37+
uint localIndex = nbl_glsl_subgroupBallotExclusiveBitCount(nbl_glsl_subgroupBallot(willAppend));
38+
uint partialPrefix = nbl_glsl_subgroupExclusiveAdd(value);
39+
40+
const uint lastSubgroupInvocationID = nbl_glsl_SubgroupSize-1u;
41+
uint subgroupIndex,subgroupPrefix;
42+
if (nbl_glsl_SubgroupInvocationID==lastSubgroupInvocationID)
43+
{
44+
// crude mutex, reuse MSB bit
45+
const uint lockBit = 0x80000000u;
46+
// first subgroup to set the bit to 1 (old value 0) proceeds with the lock
47+
while (bool(atomicOr(NBL_GLSL_SCANNING_APPEND_COUNTER_NAME[1],lockBit))) {}
48+
// now MSB is always 1
49+
subgroupPrefix = atomicAdd(NBL_GLSL_SCANNING_APPEND_COUNTER_NAME[0],partialPrefix+value);
50+
// set the MSB to 0 while adding by making sure MSB overflows
51+
subgroupIndex = atomicAdd(NBL_GLSL_SCANNING_APPEND_COUNTER_NAME[1],localIndex+(willAppend ? (lockBit+1):lockBit));
52+
}
53+
return nbl_glsl_scanning_append_result_t{
54+
nbl_glsl_subgroupBroadcast(subgroupIndex,lastSubgroupInvocationID)+localIndex,
55+
nbl_glsl_subgroupBroadcast(subgroupPrefix,lastSubgroupInvocationID)+partialPrefix
56+
};
57+
#endif
58+
}
59+
60+
// optimized version which tries to omit the atomicAdd and locks if it can, in return it may return garbage/invalid value when invocation's `value==0`
61+
nbl_glsl_scanning_append_result_t NBL_GLSL_CONCATENATE2(nbl_glsl_scanning_append,NBL_GLSL_SCANNING_APPEND_FUNCNAME_SUFFFIX)(in uint value)
62+
{
63+
const bool willAppend = bool(value);
64+
#ifdef NBL_GLSL_EXT_shader_atomic_int64
65+
if (willAppend)
66+
#else
67+
if (subgroupAny(willAppend))
68+
#endif
69+
return NBL_GLSL_CONCATENATE2(nbl_glsl_scanning_append,NBL_GLSL_SCANNING_APPEND_FUNCNAME_SUFFFIX)(value);
70+
71+
return nbl_glsl_scanning_append_result_t{~0u,~0u};
72+
}
73+
74+
#undef NBL_GLSL_SCANNING_APPEND_FUNCNAME_SUFFFIX

src/nbl/builtin/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ set(nbl_resources_to_embed
143143
"nbl/builtin/glsl/scan/indirect.comp"
144144
"nbl/builtin/glsl/scan/parameters_struct.glsl"
145145
"nbl/builtin/glsl/scan/virtual_workgroup.glsl"
146+
# faster and easier scan
147+
"nbl/builtin/glsl/scanning_append/scanning_append.glsl"
146148
# scene
147149
"nbl/builtin/glsl/scene/animation.glsl"
148150
"nbl/builtin/glsl/scene/keyframe.glsl"

0 commit comments

Comments
 (0)