Skip to content

Commit 9774305

Browse files
all workgroup primitives should work now!
1 parent 2cce4f9 commit 9774305

File tree

3 files changed

+74
-85
lines changed

3 files changed

+74
-85
lines changed

examples_tests/48.ArithmeticUnitTest/main.cpp

Lines changed: 47 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -84,64 +84,60 @@ struct max
8484

8585

8686
//subgroup method emulations on the CPU, to verify the results of the GPU methods
87-
template<typename T>
87+
template<class CRTP, typename T>
8888
struct emulatedSubgroupCommon
8989
{
90-
inline const T* getSubgroupData(uint32_t& subgroupInvocationID, uint32_t& pseudoSubgroupID, const T* workgroupData, const uint32_t localInvocationIndex, uint32_t subgroupSize, uint32_t workgroupSize)
90+
using type_t = T;
91+
92+
inline void operator()(type_t* outputData, const type_t* workgroupData, uint32_t workgroupSize, uint32_t subgroupSize)
9193
{
92-
pseudoSubgroupID = localInvocationIndex&(-subgroupSize);
93-
auto subgroupData = workgroupData+pseudoSubgroupID;
94-
subgroupInvocationID = localInvocationIndex-pseudoSubgroupID;
95-
return workgroupData+pseudoSubgroupID;
94+
for (uint32_t pseudoSubgroupID=0u; pseudoSubgroupID<workgroupSize; pseudoSubgroupID+=subgroupSize)
95+
{
96+
type_t* outSubgroupData = outputData+pseudoSubgroupID;
97+
const type_t* subgroupData = workgroupData+pseudoSubgroupID;
98+
CRTP::impl(outSubgroupData,subgroupData,core::min<uint32_t>(subgroupSize,workgroupSize-pseudoSubgroupID));
99+
}
96100
}
97101
};
98102
template<class OP>
99-
struct emulatedSubgroupReduction : emulatedSubgroupCommon<typename OP::type_t>
103+
struct emulatedSubgroupReduction : emulatedSubgroupCommon<emulatedSubgroupReduction<OP>,typename OP::type_t>
100104
{
101105
using type_t = typename OP::type_t;
102106

103-
inline type_t operator()(const type_t* workgroupData, const uint32_t localInvocationIndex, uint32_t subgroupSize, uint32_t workgroupSize)
107+
static inline void impl(type_t* outSubgroupData, const type_t* subgroupData, const uint32_t clampedSubgroupSize)
104108
{
105-
uint32_t subgroupInvocationID,pseudoSubgroupID;
106-
const type_t* subgroupData = getSubgroupData(subgroupInvocationID,pseudoSubgroupID,workgroupData,localInvocationIndex,subgroupSize,workgroupSize);
107-
type_t retval = subgroupData[0];
108-
for (auto i=1u; i<core::min<uint32_t>(subgroupSize,workgroupSize-pseudoSubgroupID); i++)
109-
retval = OP()(retval,subgroupData[i]);
110-
return retval;
109+
type_t red = subgroupData[0];
110+
for (auto i=1u; i<clampedSubgroupSize; i++)
111+
red = OP()(red,subgroupData[i]);
112+
std::fill(outSubgroupData,outSubgroupData+clampedSubgroupSize,red);
111113
}
112114

113115
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "subgroup reduction";
114116
};
115117
template<class OP>
116-
struct emulatedSubgroupScanExclusive : emulatedSubgroupCommon<typename OP::type_t>
118+
struct emulatedSubgroupScanExclusive : emulatedSubgroupCommon<emulatedSubgroupScanExclusive<OP>,typename OP::type_t>
117119
{
118120
using type_t = typename OP::type_t;
119121

120-
inline type_t operator()(const type_t* workgroupData, const uint32_t localInvocationIndex, uint32_t subgroupSize, uint32_t workgroupSize)
122+
static inline void impl(type_t* outSubgroupData, const type_t* subgroupData, const uint32_t clampedSubgroupSize)
121123
{
122-
uint32_t subgroupInvocationID,dummy;
123-
const type_t* subgroupData = getSubgroupData(subgroupInvocationID,dummy,workgroupData,localInvocationIndex,subgroupSize,workgroupSize);
124-
type_t retval = OP::IdentityElement;
125-
for (auto i=0u; i<subgroupInvocationID; i++)
126-
retval = OP()(retval, subgroupData[i]);
127-
return retval;
124+
outSubgroupData[0u] = OP::IdentityElement;
125+
for (auto i=1u; i<clampedSubgroupSize; i++)
126+
outSubgroupData[i] = OP()(outSubgroupData[i-1u],subgroupData[i-1u]);
128127
}
129128

130129
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "subgroup exclusive scan";
131130
};
132131
template<class OP>
133-
struct emulatedSubgroupScanInclusive : emulatedSubgroupCommon<typename OP::type_t>
132+
struct emulatedSubgroupScanInclusive : emulatedSubgroupCommon<emulatedSubgroupScanInclusive<OP>,typename OP::type_t>
134133
{
135134
using type_t = typename OP::type_t;
136135

137-
inline type_t operator()(const type_t* workgroupData, const uint32_t localInvocationIndex, uint32_t subgroupSize, uint32_t workgroupSize)
136+
static inline void impl(type_t* outSubgroupData, const type_t* subgroupData, const uint32_t clampedSubgroupSize)
138137
{
139-
uint32_t subgroupInvocationID,dummy;
140-
const type_t* subgroupData = getSubgroupData(subgroupInvocationID,dummy,workgroupData,localInvocationIndex,subgroupSize,workgroupSize);
141-
type_t retval = OP::IdentityElement;
142-
for (auto i=0u; i<=subgroupInvocationID; i++)
143-
retval = OP()(retval, subgroupData[i]);
144-
return retval;
138+
outSubgroupData[0u] = subgroupData[0u];
139+
for (auto i=1u; i<clampedSubgroupSize; i++)
140+
outSubgroupData[i] = OP()(outSubgroupData[i-1u],subgroupData[i]);
145141
}
146142

147143
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "subgroup inclusive scan";
@@ -153,12 +149,12 @@ struct emulatedWorkgroupReduction
153149
{
154150
using type_t = typename OP::type_t;
155151

156-
inline type_t operator()(const type_t* workgroupData, const uint32_t localInvocationIndex, uint32_t subgroupSize, uint32_t workgroupSize)
152+
inline void operator()(type_t* outputData, const type_t* workgroupData, uint32_t workgroupSize, uint32_t subgroupSize)
157153
{
158-
type_t retval = workgroupData[0];
154+
type_t red = workgroupData[0];
159155
for (auto i=1u; i<workgroupSize; i++)
160-
retval = OP()(retval,workgroupData[i]);
161-
return retval;
156+
red = OP()(red,workgroupData[i]);
157+
std::fill(outputData,outputData+workgroupSize,red);
162158
}
163159

164160
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "workgroup reduction";
@@ -168,12 +164,11 @@ struct emulatedWorkgroupScanExclusive
168164
{
169165
using type_t = typename OP::type_t;
170166

171-
inline type_t operator()(const type_t* workgroupData, const uint32_t localInvocationIndex, uint32_t subgroupSize, uint32_t workgroupSize)
167+
inline void operator()(type_t* outputData, const type_t* workgroupData, uint32_t workgroupSize, uint32_t subgroupSize)
172168
{
173-
type_t retval = OP::IdentityElement;
174-
for (auto i=0u; i<localInvocationIndex; i++)
175-
retval = OP()(retval,workgroupData[i]);
176-
return retval;
169+
outputData[0u] = OP::IdentityElement;
170+
for (auto i=1u; i<workgroupSize; i++)
171+
outputData[i] = OP()(outputData[i-1u],workgroupData[i-1u]);
177172
}
178173

179174
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "workgroup exclusive scan";
@@ -183,12 +178,11 @@ struct emulatedWorkgroupScanInclusive
183178
{
184179
using type_t = typename OP::type_t;
185180

186-
inline type_t operator()(const type_t* workgroupData, const uint32_t localInvocationIndex, uint32_t subgroupSize, uint32_t workgroupSize)
181+
inline void operator()(type_t* outputData, const type_t* workgroupData, uint32_t workgroupSize, uint32_t subgroupSize)
187182
{
188-
type_t retval = OP::IdentityElement;
189-
for (auto i=0u; i<=localInvocationIndex; i++)
190-
retval = OP()(retval,workgroupData[i]);
191-
return retval;
183+
outputData[0u] = workgroupData[0u];
184+
for (auto i=1u; i<workgroupSize; i++)
185+
outputData[i] = OP()(outputData[i-1u],workgroupData[i]);
192186
}
193187

194188
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "workgroup inclusive scan";
@@ -232,21 +226,21 @@ bool validateResults(video::IVideoDriver* driver, const uint32_t* inputData, con
232226
auto dataFromBuffer = reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(downloadStagingArea->getBufferPointer())+address);
233227

234228
// now check if the data obtained has valid values
229+
constexpr uint32_t subgroupSize = 4u;
230+
uint32_t* tmp = new uint32_t[workgroupSize];
235231
for (uint32_t workgroupID=0u; success&&workgroupID<workgroupCount; workgroupID++)
236-
for (uint32_t localInvocationIndex=0u; localInvocationIndex<workgroupSize; localInvocationIndex++)
237232
{
238-
constexpr uint32_t subgroupSize = 4u;
239-
240233
const auto workgroupOffset = workgroupID*workgroupSize;
241-
uint32_t val = Arithmetic<OP<uint32_t>>()(inputData+workgroupOffset, localInvocationIndex, subgroupSize, workgroupSize);
242-
const auto invocationOffset = workgroupOffset+localInvocationIndex;
243-
if (val!=dataFromBuffer[invocationOffset])
234+
Arithmetic<OP<uint32_t>>()(tmp,inputData+workgroupOffset,workgroupSize,subgroupSize);
235+
for (uint32_t localInvocationIndex=0u; localInvocationIndex<workgroupSize; localInvocationIndex++)
236+
if (tmp[localInvocationIndex]!=dataFromBuffer[workgroupOffset+localInvocationIndex])
244237
{
245238
os::Printer::log("Failed test #" + std::to_string(workgroupSize) + " (" + Arithmetic<OP<uint32_t>>::name + ") (" + OP<uint32_t>::name + ")", ELL_ERROR);
246239
success = false;
247240
break;
248241
}
249242
}
243+
delete[] tmp;
250244
}
251245
else
252246
os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR);
@@ -384,7 +378,7 @@ int main()
384378
//max workgroup size is hardcoded to 1024
385379
uint32_t totalFailCount = 0;
386380
const auto ds = descriptorSet.get();
387-
for (uint32_t workgroupSize=8u; workgroupSize<=1024u; workgroupSize++)
381+
for (uint32_t workgroupSize=1u; workgroupSize<=1024u; workgroupSize++)
388382
{
389383
core::smart_refctd_ptr<IGPUComputePipeline> pipelines[kTestTypeCount];
390384
for (uint32_t i=0u; i<kTestTypeCount; i++)
@@ -398,8 +392,8 @@ int main()
398392
passed = runTest<emulatedSubgroupScanExclusive>(driver,pipelines[1u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
399393
passed = runTest<emulatedSubgroupScanInclusive>(driver,pipelines[2u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
400394
passed = runTest<emulatedWorkgroupReduction>(driver,pipelines[3u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
401-
//passed = runTest<emulatedWorkgroupScanExclusive>(driver,pipelines[4u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
402-
//passed = runTest<emulatedWorkgroupScanInclusive>(driver,pipelines[5u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
395+
passed = runTest<emulatedWorkgroupScanExclusive>(driver,pipelines[4u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
396+
passed = runTest<emulatedWorkgroupScanInclusive>(driver,pipelines[5u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
403397

404398
if (passed)
405399
os::Printer::log("Passed test #" + std::to_string(workgroupSize), ELL_INFORMATION);

include/irr/builtin/glsl/workgroup/arithmetic.glsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949

5050

5151
// reduction
52-
#define IRR_GLSL_WORKGROUP_REDUCE(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV) IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,_IRR_GLSL_WORKGROUP_SIZE_,;); \
52+
#define IRR_GLSL_WORKGROUP_REDUCE(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV) IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,_IRR_GLSL_WORKGROUP_SIZE_,false); \
5353
barrier(); \
5454
return CONV(irr_glsl_workgroupBroadcast_noBarriers(scan,lastInvocationInLevel))
5555

@@ -182,7 +182,7 @@ DECLARE_OVERLOAD_WITH_BARRIERS(float,workgroupMax)
182182

183183

184184
// scan
185-
#define IRR_GLSL_WORKGROUP_SCAN(EXCLUSIVE,CONV,OP,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV) IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,_IRR_GLSL_WORKGROUP_SIZE_,IRR_GLSL_WORKGROUP_SCAN_IMPL_LOOP_POSTLUDE) \
185+
#define IRR_GLSL_WORKGROUP_SCAN(EXCLUSIVE,CONV,OP,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV) IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,_IRR_GLSL_WORKGROUP_SIZE_,true) \
186186
IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(EXCLUSIVE,CONV,INCLUSIVE_SUBGROUP_OP,IDENTITY,INVCONV,OP)
187187

188188

include/irr/builtin/glsl/workgroup/ballot.glsl

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ uint irr_glsl_workgroupBallotFindMSB();
158158

159159

160160
// TODO: [[unroll]] the while 5-times ?
161-
#define IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,ITEM_COUNT,LOOP_POSTLUDE) SUBGROUP_SCRATCH_INITIALIZE(VALUE,ITEM_COUNT,IDENTITY,INVCONV) \
161+
#define IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,ITEM_COUNT,SCAN) SUBGROUP_SCRATCH_INITIALIZE(VALUE,ITEM_COUNT,IDENTITY,INVCONV) \
162162
const uint lastInvocation = ITEM_COUNT-1u; \
163163
uint lastInvocationInLevel = lastInvocation; \
164164
uint firstLevelScan = INVCONV(INCLUSIVE_SUBGROUP_OP(false,VALUE)); \
@@ -172,81 +172,76 @@ uint irr_glsl_workgroupBallotFindMSB();
172172
), \
173173
irr_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask,pseudoSubgroupID) \
174174
); \
175-
uint storeIndex = nextStoreIndex; \
176-
uint loadIndex = subgroupScanStoreOffset; \
175+
uint scanStoreIndex = (ITEM_COUNT<<1u)+gl_LocalInvocationIndex; \
177176
bool participate = gl_LocalInvocationIndex<=lastInvocationInLevel; \
178177
while (lastInvocationInLevel>=irr_glsl_SubgroupSize*irr_glsl_SubgroupSize) \
179178
{ \
180179
CONDITIONAL_BARRIER \
181180
if (participate) \
182181
{ \
183182
if (any(bvec2(gl_LocalInvocationIndex==lastInvocationInLevel,possibleProp))) \
184-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[storeIndex] = scan; \
183+
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[nextStoreIndex] = scan; \
185184
} \
186185
barrier(); \
187186
participate = gl_LocalInvocationIndex<=(lastInvocationInLevel>>=subgroupSizeLog2); \
188187
if (participate) \
189188
{ \
190-
const uint prevLevelScan = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[loadIndex]; \
189+
const uint prevLevelScan = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[subgroupScanStoreOffset]; \
191190
scan = INVCONV(INCLUSIVE_SUBGROUP_OP(false,CONV(prevLevelScan))); \
191+
if (SCAN) _IRR_GLSL_SCRATCH_SHARED_DEFINED_[scanStoreIndex] = scan; \
192192
} \
193-
LOOP_POSTLUDE \
193+
if (SCAN) scanStoreIndex += lastInvocationInLevel+1u; \
194194
} \
195195
if (lastInvocationInLevel>=irr_glsl_SubgroupSize) \
196196
{ \
197197
CONDITIONAL_BARRIER \
198198
if (participate) \
199199
{ \
200200
if (any(bvec2(gl_LocalInvocationIndex==lastInvocationInLevel,possibleProp))) \
201-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[storeIndex] = scan; \
201+
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[nextStoreIndex] = scan; \
202202
} \
203203
barrier(); \
204204
participate = gl_LocalInvocationIndex<=(lastInvocationInLevel>>=subgroupSizeLog2); \
205205
if (participate) \
206206
{ \
207-
const uint prevLevelScan = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[loadIndex]; \
207+
const uint prevLevelScan = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[subgroupScanStoreOffset]; \
208208
scan = INVCONV(INCLUSIVE_SUBGROUP_OP(false,CONV(prevLevelScan))); \
209+
if (SCAN) _IRR_GLSL_SCRATCH_SHARED_DEFINED_[scanStoreIndex] = scan; \
209210
} \
210211
}
211212

212-
#define IRR_GLSL_WORKGROUP_SCAN_IMPL_LOOP_POSTLUDE { \
213-
const uint memoryUsedThisPass = lastInvocationInLevel+1u; \
214-
storeIndex += memoryUsedThisPass; \
215-
loadIndex += memoryUsedThisPass; \
216-
}
217-
218213
#define IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(EXCLUSIVE,CONV,INCLUSIVE_SUBGROUP_OP,IDENTITY,INVCONV,OP) CONDITIONAL_BARRIER \
219214
if (lastInvocation>=irr_glsl_SubgroupSize) \
220215
{ \
221-
if (gl_LocalInvocationIndex<lastInvocationInLevel) \
222-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[loadIndex+1u] = scan; \
216+
uint scanLoadIndex = scanStoreIndex+irr_glsl_SubgroupSize; \
217+
const uint shiftedInvocationIndex = gl_LocalInvocationIndex+irr_glsl_SubgroupSize; \
218+
const uint currentToHighLevel = pseudoSubgroupID-shiftedInvocationIndex; \
223219
for (uint logShift=(findMSB(lastInvocation)/subgroupSizeLog2-1u)*subgroupSizeLog2; logShift>0u; logShift-=subgroupSizeLog2) \
224220
{ \
221+
lastInvocationInLevel = lastInvocation>>logShift; \
225222
barrier(); \
223+
const uint currentLevelIndex = scanLoadIndex-(lastInvocationInLevel+1u); \
224+
if (shiftedInvocationIndex<=lastInvocationInLevel) \
225+
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[currentLevelIndex] = INVCONV(OP (CONV(_IRR_GLSL_SCRATCH_SHARED_DEFINED_[scanLoadIndex+currentToHighLevel]),CONV(_IRR_GLSL_SCRATCH_SHARED_DEFINED_[currentLevelIndex]))); \
226+
scanLoadIndex = currentLevelIndex; \
226227
} \
227228
barrier(); \
228-
if (gl_LocalInvocationIndex<=lastInvocation) \
229-
firstLevelScan = INVCONV(OP (CONV(firstLevelScan),CONV(_IRR_GLSL_SCRATCH_SHARED_DEFINED_[nextStoreIndex]))); \
229+
if (gl_LocalInvocationIndex<=lastInvocation && pseudoSubgroupID!=0u) \
230+
{ \
231+
const uint higherLevelExclusive = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[scanLoadIndex+currentToHighLevel-1u]; \
232+
firstLevelScan = INVCONV(OP (CONV(higherLevelExclusive),CONV(firstLevelScan))); \
233+
} \
230234
} \
231235
if (EXCLUSIVE) \
232236
{ \
233-
const uint sharedOffsetOutTheWay = lastInvocationInLevel+gl_LocalInvocationIndex; \
237+
const uint sharedOffsetOutTheWay = scanStoreIndex+lastInvocationInLevel; \
234238
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[sharedOffsetOutTheWay+1u] = firstLevelScan; \
235239
barrier(); \
236240
return gl_LocalInvocationIndex!=0u ? CONV(_IRR_GLSL_SCRATCH_SHARED_DEFINED_[sharedOffsetOutTheWay]):IDENTITY; \
237241
} \
238242
else \
239243
return CONV(firstLevelScan);
240-
/*
241-
lastInvocationInLevel = lastInvocation>>logShift; \
242-
const uint memoryUsedThisPass = lastInvocationInLevel+1u; \
243-
if (gl_LocalInvocationIndex<=lastInvocationInLevel) \
244-
{ \
245-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[outIx] = INVCONV(OP (CONV(_IRR_GLSL_SCRATCH_SHARED_DEFINED_[outIx]),CONV(_IRR_GLSL_SCRATCH_SHARED_DEFINED_[lowerIndex]))); \
246-
} \
247-
storeIndex -= memoryUsedThisPass; \
248-
loadIndex -= memoryUsedThisPass; \
249-
*/
244+
250245

251246
uint irr_glsl_workgroupBallotScanBitCount_impl(in bool exclusive);
252247

@@ -262,7 +257,7 @@ uint irr_glsl_workgroupBallotExclusiveBitCount()
262257
uint irr_glsl_workgroupBallotScanBitCount_impl_impl(in uint localBitfield)
263258
{
264259
barrier();
265-
IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(irr_glsl_identityFunction,irr_glsl_subgroupInclusiveAdd_impl,localBitfield,0u,irr_glsl_identityFunction,irr_glsl_workgroupBallot_impl_BitfieldDWORDs,IRR_GLSL_WORKGROUP_SCAN_IMPL_LOOP_POSTLUDE)
260+
IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(irr_glsl_identityFunction,irr_glsl_subgroupInclusiveAdd_impl,localBitfield,0u,irr_glsl_identityFunction,irr_glsl_workgroupBallot_impl_BitfieldDWORDs,true)
266261
IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(true,irr_glsl_identityFunction,irr_glsl_subgroupInclusiveAdd_impl,0u,irr_glsl_identityFunction,irr_glsl_add)
267262
}
268263
uint irr_glsl_workgroupBallotScanBitCount_impl(in bool exclusive)

0 commit comments

Comments
 (0)