|
16 | 16 | #else
|
17 | 17 | */
|
18 | 18 |
|
19 |
| -#define SUBGROUP_BARRIERS |
| 19 | +#define SUBGROUP_BARRIERS memoryBarrierShared() |
20 | 20 |
|
21 | 21 | //#endif
|
22 | 22 |
|
@@ -141,28 +141,34 @@ TODO: Keep the pseudo subgroup and offset code DRY, move to a function.
|
141 | 141 | const uint pseudoSubgroupID = gl_LocalInvocationIndex&hiMask; \
|
142 | 142 | const uint scratchOffset = (pseudoSubgroupID<<1u)|pseudoSubgroupInvocation; \
|
143 | 143 | const uint primaryOffset = scratchOffset+irr_glsl_HalfSubgroupSize; \
|
| 144 | + SUBGROUP_BARRIERS; \ |
144 | 145 | _IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset] = INVCONV (VALUE); \
|
145 | 146 | if (CLEAR && pseudoSubgroupInvocation<irr_glsl_HalfSubgroupSize) \
|
146 | 147 | _IRR_GLSL_SCRATCH_SHARED_DEFINED_[scratchOffset] = INVCONV (IDENTITY); \
|
147 | 148 | SUBGROUP_BARRIERS; \
|
148 | 149 | VALUE = OP (VALUE,CONV (_IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset-1u])); \
|
| 150 | + SUBGROUP_BARRIERS; \ |
149 | 151 | _IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset] = INVCONV (VALUE); \
|
150 | 152 | SUBGROUP_BARRIERS; \
|
151 | 153 | VALUE = OP (VALUE,CONV (_IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset-2u])); \
|
152 | 154 | for (uint stp=irr_glsl_MinSubgroupSize; stp<irr_glsl_SubgroupSize; stp<<=1u) \
|
153 | 155 | { \
|
| 156 | + SUBGROUP_BARRIERS; \ |
154 | 157 | _IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset] = INVCONV (VALUE); \
|
155 | 158 | SUBGROUP_BARRIERS; \
|
156 | 159 | VALUE = OP (VALUE,CONV (_IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset-stp])); \
|
157 |
| - } |
| 160 | + } \ |
| 161 | + SUBGROUP_BARRIERS; |
158 | 162 |
|
159 | 163 |
|
160 | 164 | #define IRR_GLSL_SUBGROUP_REDUCE(CONV,OP,VALUE,CLEAR,IDENTITY,INVCONV) IRR_GLSL_SUBGROUP_ARITHMETIC_IMPL(CONV,OP,VALUE,CLEAR,IDENTITY,INVCONV) \
|
161 | 165 | _IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset] = INVCONV (VALUE); \
|
162 | 166 | SUBGROUP_BARRIERS; \
|
163 | 167 | const uint maxPseudoSubgroupInvocation = (_IRR_GLSL_WORKGROUP_SIZE_-1u)&loMask; \
|
164 | 168 | const uint maxPseudoSubgroupID = (_IRR_GLSL_WORKGROUP_SIZE_-1u)&hiMask; \
|
165 |
| - return CONV(_IRR_GLSL_SCRATCH_SHARED_DEFINED_[((maxPseudoSubgroupID<<1u)|maxPseudoSubgroupInvocation)+irr_glsl_HalfSubgroupSize]) |
| 169 | + const uint lastItem = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[((maxPseudoSubgroupID<<1u)|maxPseudoSubgroupInvocation)+irr_glsl_HalfSubgroupSize]; \ |
| 170 | + SUBGROUP_BARRIERS; \ |
| 171 | + return CONV (lastItem); |
166 | 172 |
|
167 | 173 |
|
168 | 174 |
|
@@ -267,7 +273,9 @@ float irr_glsl_subgroupMax_impl(in bool clearScratchToIdentity, float value)
|
267 | 273 | #define IRR_GLSL_SUBGROUP_EXCLUSIVE_SCAN(CONV,OP,VALUE,CLEAR,IDENTITY,INVCONV) IRR_GLSL_SUBGROUP_ARITHMETIC_IMPL(CONV,OP,VALUE,CLEAR,IDENTITY,INVCONV) \
|
268 | 274 | _IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset] = INVCONV (VALUE); \
|
269 | 275 | SUBGROUP_BARRIERS; \
|
270 |
| - return CONV (_IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset-1u]) |
| 276 | + const uint prevItem = _IRR_GLSL_SCRATCH_SHARED_DEFINED_[primaryOffset-1u]; \ |
| 277 | + SUBGROUP_BARRIERS; \ |
| 278 | + return CONV (prevItem); |
271 | 279 |
|
272 | 280 |
|
273 | 281 | uint irr_glsl_subgroupInclusiveAnd_impl(in bool clearScratchToIdentity, uint value)
|
@@ -348,16 +356,7 @@ float irr_glsl_subgroupExclusiveOr_impl(in bool clearScratchToIdentity, float va
|
348 | 356 |
|
349 | 357 | uint irr_glsl_subgroupInclusiveAdd_impl(in bool clearScratchToIdentity, uint value)
|
350 | 358 | {
|
351 |
| - const uint loMask = 3u; |
352 |
| - uint pseudoSubgroupInvocation = gl_LocalInvocationIndex & loMask; |
353 |
| - uint pseudoSubgroupID = gl_LocalInvocationIndex & (~loMask); |
354 |
| - _IRR_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = value; |
355 |
| - barrier(); |
356 |
| - memoryBarrierShared(); |
357 |
| - for (uint i=pseudoSubgroupID; i<gl_LocalInvocationIndex; i++) |
358 |
| - value += _IRR_GLSL_SCRATCH_SHARED_DEFINED_[i]; |
359 |
| - return value; |
360 |
| - //IRR_GLSL_SUBGROUP_INCLUSIVE_SCAN(irr_glsl_identityFunction,irr_glsl_add,value,clearScratchToIdentity,0u,irr_glsl_identityFunction); |
| 359 | + IRR_GLSL_SUBGROUP_INCLUSIVE_SCAN(irr_glsl_identityFunction,irr_glsl_add,value,clearScratchToIdentity,0u,irr_glsl_identityFunction); |
361 | 360 | }
|
362 | 361 | int irr_glsl_subgroupInclusiveAdd_impl(in bool clearScratchToIdentity, int value)
|
363 | 362 | {
|
|
0 commit comments