@@ -77,14 +77,17 @@ struct exclusive_scan
77
77
*/
78
78
namespace impl
79
79
{
80
- template<uint16_t DWORDCount , class BallotAccessor>
80
+ template<uint16_t ItemCount , class BallotAccessor>
81
81
uint16_t ballotCountedBitDWORD (NBL_REF_ARG (BallotAccessor) ballotAccessor)
82
82
{
83
83
const uint32_t index = SubgroupContiguousIndex ();
84
- if (index<DWORDCount )
84
+ if (index<impl::ballot_dword_count<ItemCount>::value )
85
85
{
86
- const uint32_t bitfield = ballotAccessor.get (index);
87
- // FIXME: stip unused bits from bitfield
86
+ uint32_t bitfield = ballotAccessor.get (index);
87
+ // strip unwanted bits from bitfield
88
+ const uint16_t Remainder = ItemCount&31 ;
89
+ if (Remainder!=0 )
90
+ bitfield &= (0x1u<<Remainder)-1 ;
88
91
return uint16_t (countbits (bitfield));
89
92
}
90
93
return 0 ;
@@ -97,7 +100,7 @@ uint16_t ballotScanBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_
97
100
98
101
static const uint16_t DWORDCount = impl::ballot_dword_count<ItemCount>::value;
99
102
const uint32_t count = exclusive_scan<plus<uint32_t>,DWORDCount>::template __call<ArithmeticAccessor>(
100
- ballotCountedBitDWORD<DWORDCount ,BallotAccessor>(ballotAccessor),
103
+ ballotCountedBitDWORD<ItemCount ,BallotAccessor>(ballotAccessor),
101
104
arithmeticAccessor
102
105
);
103
106
return uint16_t (countbits (localBitfield&(Exclusive ? glsl::gl_SubgroupLtMask ():glsl::gl_SubgroupLeMask ())[0 ]));
@@ -110,7 +113,7 @@ uint16_t ballotBitCount(NBL_REF_ARG(BallotAccessor) ballotAccessor, NBL_REF_ARG(
110
113
{
111
114
static const uint16_t DWORDCount = impl::ballot_dword_count<ItemCount>::value;
112
115
return uint16_t (reduction<plus<uint32_t>,DWORDCount>::template __call<ArithmeticAccessor>(
113
- impl::ballotCountedBitDWORD<DWORDCount ,BallotAccessor>(ballotAccessor),
116
+ impl::ballotCountedBitDWORD<ItemCount ,BallotAccessor>(ballotAccessor),
114
117
arithmeticAccessor
115
118
));
116
119
}
0 commit comments