Skip to content

Commit 4a7dd3a

Browse files
unify the code a bit
1 parent 911090b commit 4a7dd3a

File tree

3 files changed

+54
-52
lines changed

3 files changed

+54
-52
lines changed

examples_tests/48.ArithmeticUnitTest/main.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -364,9 +364,9 @@ int main()
364364

365365
driver->beginScene(true);
366366
const video::IGPUDescriptorSet* ds = descriptorSet.get();
367-
passed = runTest<emulatedSubgroupReduction>(driver,pipelines[0u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
368-
passed = runTest<emulatedSubgroupScanExclusive>(driver,pipelines[1u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
369-
passed = runTest<emulatedSubgroupScanInclusive>(driver,pipelines[2u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
367+
//passed = runTest<emulatedSubgroupReduction>(driver,pipelines[0u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
368+
//passed = runTest<emulatedSubgroupScanExclusive>(driver,pipelines[1u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
369+
//passed = runTest<emulatedSubgroupScanInclusive>(driver,pipelines[2u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
370370
passed = runTest<emulatedWorkgroupReduction>(driver,pipelines[3u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
371371
//passed = runTest<emulatedSubgroupScanInclusive>(driver,pipelines[4u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
372372
//passed = runTest<emulatedSubgroupScanInclusive>(driver,pipelines[5u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;

include/irr/builtin/glsl/workgroup/arithmetic.glsl

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949

5050

5151
// reduction
52-
#define IRR_GLSL_WORKGROUP_REDUCE(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV) IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,_IRR_GLSL_WORKGROUP_SIZE_) \
53-
IRR_GLSL_WORKGROUP_REDUCE_IMPL_TAIL(CONV,INCLUSIVE_SUBGROUP_OP,INVCONV) \
52+
#define IRR_GLSL_WORKGROUP_REDUCE(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV) IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,_IRR_GLSL_WORKGROUP_SIZE_,;); \
53+
barrier(); \
5454
return CONV(irr_glsl_workgroupBroadcast_noBarriers(scan,lastInvocationInLevel))
5555

5656

@@ -182,17 +182,8 @@ DECLARE_OVERLOAD_WITH_BARRIERS(float,workgroupMax)
182182

183183

184184
// scan
185-
#define IRR_GLSL_WORKGROUP_SCAN(EXCLUSIVE,CONV,OP,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV) IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,_IRR_GLSL_WORKGROUP_SIZE_) \
186-
IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(CONV,INCLUSIVE_SUBGROUP_OP,INVCONV,OP) \
187-
if (EXCLUSIVE) \
188-
{ \
189-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[lastInvocationInLevel+1u+gl_LocalInvocationIndex] = firstLevelScan; \
190-
barrier(); \
191-
memoryBarrierShared(); \
192-
return CONV(gl_LocalInvocationIndex!=0u ? _IRR_GLSL_SCRATCH_SHARED_DEFINED_[lastInvocationInLevel+gl_LocalInvocationIndex]:0u); \
193-
} \
194-
else \
195-
return CONV(firstLevelScan);
185+
#define IRR_GLSL_WORKGROUP_SCAN(EXCLUSIVE,CONV,OP,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV) IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,_IRR_GLSL_WORKGROUP_SIZE_,IRR_GLSL_WORKGROUP_SCAN_IMPL_LOOP_POSTLUDE) \
186+
IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(EXCLUSIVE,CONV,INCLUSIVE_SUBGROUP_OP,INVCONV,OP)
196187

197188

198189

include/irr/builtin/glsl/workgroup/ballot.glsl

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -158,67 +158,80 @@ uint irr_glsl_workgroupBallotFindMSB();
158158

159159

160160
// TODO: [[unroll]] the while 5-times ?
161-
#define IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,ITEM_COUNT) SUBGROUP_SCRATCH_INITIALIZE(VALUE,ITEM_COUNT,IDENTITY,INVCONV) \
161+
#define IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(CONV,INCLUSIVE_SUBGROUP_OP,VALUE,IDENTITY,INVCONV,ITEM_COUNT,LOOP_POSTLUDE) SUBGROUP_SCRATCH_INITIALIZE(VALUE,ITEM_COUNT,IDENTITY,INVCONV) \
162162
const uint lastInvocation = ITEM_COUNT-1u; \
163163
uint lastInvocationInLevel = lastInvocation; \
164164
const uint firstLevelScan = INVCONV(INCLUSIVE_SUBGROUP_OP(false,VALUE)); \
165165
uint scan = firstLevelScan; \
166166
const bool possibleProp = pseudoSubgroupInvocation==loMask; \
167167
const uint subgroupSizeLog2 = findLSB(irr_glsl_SubgroupSize); \
168168
const uint pseudoSubgroupID = (gl_LocalInvocationIndex>>subgroupSizeLog2); \
169-
const uint nextPassStoreIndex = irr_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset( \
169+
const uint nextStoreIndex = irr_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset( \
170170
irr_glsl_subgroup_impl_getSubgroupEmulationMemoryStart( \
171171
irr_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask,pseudoSubgroupID) \
172172
), \
173173
irr_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask,pseudoSubgroupID) \
174174
); \
175-
bool participate = gl_LocalInvocationIndex<=lastInvocationInLevel;
176-
177-
#define IRR_GLSL_WORKGROUP_REDUCE_IMPL_TAIL(CONV,INCLUSIVE_SUBGROUP_OP,INVCONV) while (lastInvocationInLevel>=irr_glsl_SubgroupSize) \
175+
uint storeIndex = nextStoreIndex; \
176+
uint loadIndex = subgroupScanStoreOffset; \
177+
bool participate = gl_LocalInvocationIndex<=lastInvocationInLevel; \
178+
while (lastInvocationInLevel>=irr_glsl_SubgroupSize*irr_glsl_SubgroupSize) \
178179
{ \
179180
CONDITIONAL_BARRIER \
180181
if (participate) \
181182
{ \
182183
if (any(bvec2(gl_LocalInvocationIndex==lastInvocationInLevel,possibleProp))) \
183-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[nextPassStoreIndex] = scan; \
184+
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[storeIndex] = scan; \
184185
} \
185186
barrier(); \
186187
participate = gl_LocalInvocationIndex<=(lastInvocationInLevel>>=subgroupSizeLog2); \
187188
if (participate) \
188189
{ \
189-
const uint prevLevelScan = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[subgroupScanStoreOffset]; \
190+
const uint prevLevelScan = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[loadIndex]; \
190191
scan = INVCONV(INCLUSIVE_SUBGROUP_OP(false,CONV(prevLevelScan))); \
191192
} \
193+
LOOP_POSTLUDE \
192194
} \
193-
CONDITIONAL_BARRIER
194-
195-
#define IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(CONV,INCLUSIVE_SUBGROUP_OP,INVCONV,OP) ;
196-
/*
197-
#define IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(CONV,INCLUSIVE_SUBGROUP_OP,INVCONV,OP) while (lastInvocationInLevel>=irr_glsl_SubgroupSize) \
195+
if (lastInvocationInLevel>=irr_glsl_SubgroupSize) \
198196
{ \
199197
CONDITIONAL_BARRIER \
200-
const bool prop = propagateReduction&&gl_LocalInvocationIndex<lastInvocationInLevel || gl_LocalInvocationIndex==lastInvocationInLevel; \
201-
if (prop) \
202-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[lowerIndex] = scan; \
198+
if (participate) \
199+
{ \
200+
if (any(bvec2(gl_LocalInvocationIndex==lastInvocationInLevel,possibleProp))) \
201+
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[storeIndex] = scan; \
202+
} \
203203
barrier(); \
204-
memoryBarrierShared(); \
205-
lastInvocationInLevel >>= subgroupSizeLog2; \
206-
if (gl_LocalInvocationIndex<=lastInvocationInLevel) \
204+
participate = gl_LocalInvocationIndex<=(lastInvocationInLevel>>=subgroupSizeLog2); \
205+
if (participate) \
207206
{ \
208-
const uint prevLevelScan = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[lowerIndex+lo2HiIndex]; \
207+
const uint prevLevelScan = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[loadIndex]; \
209208
scan = INVCONV(INCLUSIVE_SUBGROUP_OP(false,CONV(prevLevelScan))); \
210209
} \
211-
lowerIndex += lastInvocationInLevel+1u; \
212-
} \
213-
CONDITIONAL_BARRIER \
210+
}
211+
212+
#define IRR_GLSL_WORKGROUP_SCAN_IMPL_LOOP_POSTLUDE { \
213+
const uint memoryUsedThisPass = lastInvocationInLevel+1u; \
214+
storeIndex += memoryUsedThisPass; \
215+
loadIndex += memoryUsedThisPass; \
216+
}
217+
218+
#define IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(EXCLUSIVE,CONV,INCLUSIVE_SUBGROUP_OP,INVCONV,OP) CONDITIONAL_BARRIER \
214219
if (lastInvocation>=irr_glsl_SubgroupSize) \
215220
{ \
216-
lowerIndex -= lastInvocationInLevel; \
217221
if (gl_LocalInvocationIndex<lastInvocationInLevel) \
218-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[lowerIndex+lo2HiIndex] = scan; \
219-
lowerIndex--; \
222+
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[loadIndex+1u] = scan; \
220223
barrier(); \
221-
memoryBarrierShared(); \
224+
} \
225+
if (EXCLUSIVE) \
226+
{ \
227+
const uint sharedOffsetOutTheWay = lastInvocationInLevel+gl_LocalInvocationIndex; \
228+
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[sharedOffsetOutTheWay+1u] = firstLevelScan; \
229+
barrier(); \
230+
return CONV(gl_LocalInvocationIndex!=0u ? _IRR_GLSL_SCRATCH_SHARED_DEFINED_[sharedOffsetOutTheWay]:0u); \
231+
} \
232+
else \
233+
return CONV(firstLevelScan);
234+
/*
222235
const uint shiftedInvocationIndex = gl_LocalInvocationIndex+irr_glsl_SubgroupSize; \
223236
const uint lo2HiIndexDownsweep = lo2HiIndex+irr_glsl_SubgroupSize; \
224237
for (uint logShift=(findMSB(lastInvocation)/subgroupSizeLog2-1u)*subgroupSizeLog2; logShift>0u; logShift-=subgroupSizeLog2) \
@@ -236,8 +249,8 @@ uint irr_glsl_workgroupBallotFindMSB();
236249
} \
237250
if (shiftedInvocationIndex<=lastInvocation) \
238251
firstLevelScan = INVCONV(OP (CONV(firstLevelScan),CONV(_IRR_GLSL_SCRATCH_SHARED_DEFINED_[lowerIndex-1u]))); \
239-
}
240252
*/
253+
241254
uint irr_glsl_workgroupBallotScanBitCount_impl(in bool exclusive);
242255

243256
uint irr_glsl_workgroupBallotInclusiveBitCount()
@@ -249,22 +262,20 @@ uint irr_glsl_workgroupBallotExclusiveBitCount()
249262
return irr_glsl_workgroupBallotScanBitCount_impl(true);
250263
}
251264

265+
uint irr_glsl_workgroupBallotScanBitCount_impl_impl(in uint localBitfield)
266+
{
267+
barrier();
268+
IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(irr_glsl_identityFunction,irr_glsl_subgroupInclusiveAdd_impl,localBitfield,0u,irr_glsl_identityFunction,irr_glsl_workgroupBallot_impl_BitfieldDWORDs,IRR_GLSL_WORKGROUP_SCAN_IMPL_LOOP_POSTLUDE)
269+
IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(true,irr_glsl_identityFunction,irr_glsl_subgroupInclusiveAdd_impl,irr_glsl_identityFunction,irr_glsl_add)
270+
}
252271
uint irr_glsl_workgroupBallotScanBitCount_impl(in bool exclusive)
253272
{
254273
uint localBitfieldBackup;
255274
if (gl_LocalInvocationIndex<irr_glsl_workgroupBallot_impl_BitfieldDWORDs)
256275
localBitfieldBackup = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex];
257276

258277
// scan hierarchically
259-
uint globalCount;
260-
{
261-
barrier();
262-
IRR_GLSL_WORKGROUP_COMMON_IMPL_HEAD(irr_glsl_identityFunction,irr_glsl_subgroupInclusiveAdd_impl,localBitfieldBackup,0u,irr_glsl_identityFunction,irr_glsl_workgroupBallot_impl_BitfieldDWORDs)
263-
IRR_GLSL_WORKGROUP_SCAN_IMPL_TAIL(irr_glsl_identityFunction,irr_glsl_subgroupInclusiveAdd_impl,irr_glsl_identityFunction,irr_glsl_add)
264-
_IRR_GLSL_SCRATCH_SHARED_DEFINED_[lastInvocationInLevel+1u+gl_LocalInvocationIndex] = firstLevelScan;
265-
barrier();
266-
globalCount = gl_LocalInvocationIndex!=0u ? _IRR_GLSL_SCRATCH_SHARED_DEFINED_[lastInvocationInLevel+gl_LocalInvocationIndex]:0u;
267-
}
278+
uint globalCount = irr_glsl_workgroupBallotScanBitCount_impl_impl(localBitfieldBackup);
268279

269280
// restore
270281
if (gl_LocalInvocationIndex<irr_glsl_workgroupBallot_impl_BitfieldDWORDs)

0 commit comments

Comments
 (0)