Skip to content

Commit b638080

Browse files
add bitfield stripping for unused items to the workgroup ballot counts
1 parent 867103c commit b638080

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

include/nbl/builtin/hlsl/workgroup/arithmetic.hlsl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,17 @@ struct exclusive_scan
7777
*/
7878
namespace impl
7979
{
80-
template<uint16_t DWORDCount, class BallotAccessor>
80+
template<uint16_t ItemCount, class BallotAccessor>
8181
uint16_t ballotCountedBitDWORD(NBL_REF_ARG(BallotAccessor) ballotAccessor)
8282
{
8383
const uint32_t index = SubgroupContiguousIndex();
84-
if (index<DWORDCount)
84+
if (index<impl::ballot_dword_count<ItemCount>::value)
8585
{
86-
const uint32_t bitfield = ballotAccessor.get(index);
87-
// FIXME: stip unused bits from bitfield
86+
uint32_t bitfield = ballotAccessor.get(index);
87+
// strip unwanted bits from bitfield
88+
const uint16_t Remainder = ItemCount&31;
89+
if (Remainder!=0)
90+
bitfield &= (0x1u<<Remainder)-1;
8891
return uint16_t(countbits(bitfield));
8992
}
9093
return 0;
@@ -97,7 +100,7 @@ uint16_t ballotScanBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_
97100

98101
static const uint16_t DWORDCount = impl::ballot_dword_count<ItemCount>::value;
99102
const uint32_t count = exclusive_scan<plus<uint32_t>,DWORDCount>::template __call<ArithmeticAccessor>(
100-
ballotCountedBitDWORD<DWORDCount,BallotAccessor>(ballotAccessor),
103+
ballotCountedBitDWORD<ItemCount,BallotAccessor>(ballotAccessor),
101104
arithmeticAccessor
102105
);
103106
return uint16_t(countbits(localBitfield&(Exclusive ? glsl::gl_SubgroupLtMask():glsl::gl_SubgroupLeMask())[0]));
@@ -110,7 +113,7 @@ uint16_t ballotBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_ARG(
110113
{
111114
static const uint16_t DWORDCount = impl::ballot_dword_count<ItemCount>::value;
112115
return uint16_t(reduction<plus<uint32_t>,DWORDCount>::template __call<ArithmeticAccessor>(
113-
impl::ballotCountedBitDWORD<DWORDCount,BallotAccessor>(ballotAccessor),
116+
impl::ballotCountedBitDWORD<ItemCount,BallotAccessor>(ballotAccessor),
114117
arithmeticAccessor
115118
));
116119
}

0 commit comments

Comments
 (0)