|
| 1 | +// Copyright (C) 2022 - DevSH Graphics Programming Sp. z O.O. |
| 2 | +// This file is part of the "Nabla Engine". |
| 3 | +// For conditions of distribution and use, see copyright notice in nabla.h |
| 4 | +#ifndef _NBL_BUILTIN_HLSL_SCANNING_APPEND_INCLUDED_ |
| 5 | +#define _NBL_BUILTIN_HLSL_SCANNING_APPEND_INCLUDED_ |
| 6 | + |
| 7 | +namespace nbl |
| 8 | +{ |
| 9 | +namespace hlsl |
| 10 | +{ |
| 11 | +namespace scanning_append |
| 12 | +{ |
| 13 | + |
| 14 | +struct result_t |
| 15 | +{ |
| 16 | + static result_t invalid() |
| 17 | + { |
| 18 | + result_t retval; |
| 19 | + retval.exclusivePrefixSum = retval.outputIndex = ~0u; |
| 20 | + return retval; |
| 21 | + } |
| 22 | + |
| 23 | + uint outputIndex; |
| 24 | + uint exclusivePrefixSum; |
| 25 | +}; |
| 26 | + |
| 27 | + |
| 28 | +// Elements with value 0 do not get appended |
| 29 | +// Note: If NBL_GLSL_EXT_shader_atomic_int64 is not present, then the call to these functions needs to be subgroup uniform |
| 30 | +template<class AtomicCounterAccessor> |
| 31 | +result_t non_negative(inout AtomicCounterAccessor accessor, in uint value) |
| 32 | +{ |
| 33 | + const bool willAppend = bool(value); |
| 34 | + |
| 35 | + result_t retval; |
| 36 | +#ifdef NBL_GLSL_EXT_shader_atomic_int64 |
| 37 | + uint64_t add = value; |
| 38 | + if (willAppend) |
| 39 | + add |= 0x100000000ull; |
| 40 | + const uint64_t count_reduction = accessor.fetchIncr(add); |
| 41 | + retval.outputIndex = uint(count_reduction>>32); |
| 42 | + retval.exclusivePrefixSum = uint(count_reduction); |
| 43 | +#else |
| 44 | + #error "Untested Path, won't touch this until we actually need to ship something on Vulkan mobile or GL!" |
| 45 | + uint localIndex = subgroup::ballotExclusiveBitCount(subgroup::ballot(willAppend)); |
| 46 | + uint partialPrefix = subgroup::exclusiveAdd(value); |
| 47 | + |
| 48 | + uint subgroupIndex,subgroupPrefix; |
| 49 | + // elect last invocation |
| 50 | + const uint lastSubgroupInvocationID = subgroup::Size-1u; |
| 51 | + if (subgroup::InvocationID==lastSubgroupInvocationID) |
| 52 | + { |
| 53 | + // crude mutex, reuse MSB bit |
| 54 | + const uint lockBit = 0x80000000u; |
| 55 | + // first subgroup to set the bit to 1 (old value 0) proceeds with the lock |
| 56 | + while (accessor.fetchOrCount(lockBit)) {} |
| 57 | + // now MSB is always 1 |
| 58 | + subgroupPrefix = accessor.fetchIncrSum(partialPrefix+value); |
| 59 | + // set the MSB to 0 (unlock) while adding, by making sure MSB overflows |
| 60 | + uint subgroupCount = localIndex; |
| 61 | + if (willAppend) |
| 62 | + subgroupCount++; |
| 63 | + subgroupIndex = accessor.fetchIncrCount(lockBit|subgroupCount); |
| 64 | + } |
| 65 | + retval.outputIndex = subgroup::broadcast(subgroupIndex,lastSubgroupInvocationID)+localIndex; |
| 66 | + retval.exclusivePrefixSum = subgroup::broadcast(subgroupPrefix,lastSubgroupInvocationID)+partialPrefix; |
| 67 | +#endif |
| 68 | + return retval; |
| 69 | +} |
| 70 | + |
| 71 | +// optimized version which tries to omit the atomicAdd and locks if it can, in return it may return garbage/invalid value when invocation's `value==0` |
| 72 | +template<class AtomicCounterAccessor> |
| 73 | +result_t positive(inout AtomicCounterAccessor accessor, in uint value) |
| 74 | +{ |
| 75 | + const bool willAppend = bool(value); |
| 76 | +#ifdef NBL_GLSL_EXT_shader_atomic_int64 |
| 77 | + if (willAppend) |
| 78 | +#else |
| 79 | + if (WaveActiveAnyTrue(willAppend)) |
| 80 | +#endif |
| 81 | + return non_negative<AtomicCounterAccessor>(accessor,value); |
| 82 | + |
| 83 | + return result_t::invalid(); |
| 84 | +} |
| 85 | + |
| 86 | +} |
| 87 | +} |
| 88 | +} |
| 89 | + |
| 90 | +#endif |
0 commit comments