@@ -1798,26 +1798,27 @@ DEFN_ARITH_OPERATIONS(double)
17981798DEFN_ARITH_OPERATIONS (half )
17991799#endif // defined(cl_khr_fp16)
18001800
1801- #define DEFN_WORK_GROUP_REDUCE (type , op , X ) \
1802- { \
1803- GET_MEMPOOL_PTR(data, type, true, 0) \
1804- uint lid = __spirv_BuiltInLocalInvocationIndex(); \
1805- uint lsize = __spirv_WorkgroupSize(); \
1806- data[lid] = X; \
1807- \
1808- uint i = 1 << ( ((8 * sizeof(uint)) - __builtin_spirv_OpenCL_clz_i32(lsize - 1)) - 1); \
1809- while(i > 0) \
1810- { \
1801+ #define DEFN_WORK_GROUP_REDUCE (type , op , identity , X ) \
1802+ { \
1803+ GET_MEMPOOL_PTR(data, type, true, 0) \
1804+ uint lid = __spirv_BuiltInLocalInvocationIndex(); \
1805+ uint lsize = __spirv_WorkgroupSize(); \
1806+ data[lid] = X; \
1807+ __builtin_spirv_OpControlBarrier_i32_i32_i32(Execution, 0, AcquireRelease | WorkgroupMemory); \
1808+ uint mask = 1 << ( ((8 * sizeof(uint)) - __builtin_spirv_OpenCL_clz_i32(lsize - 1)) - 1) ; \
1809+ while( mask > 0 ) \
1810+ { \
1811+ uint c = lid ^ mask; \
1812+ type other = ( c < lsize ) ? data[ c ] : identity; \
1813+ X = op( other, X ); \
1814+ __builtin_spirv_OpControlBarrier_i32_i32_i32(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
1815+ data[lid] = X; \
18111816 __builtin_spirv_OpControlBarrier_i32_i32_i32(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
1812- if ((lid < i) && (lid + i < lsize)) \
1813- { \
1814- X = op(X, data[lid + i]); \
1815- data[lid] = X; \
1816- } \
1817- i >>= 1; \
1818- } \
1817+ mask >>= 1; \
1818+ } \
1819+ type ret = data[0]; \
18191820 __builtin_spirv_OpControlBarrier_i32_i32_i32(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
1820- return data[0]; \
1821+ return ret; \
18211822}
18221823
18231824
@@ -1987,7 +1988,7 @@ DEFN_ARITH_OPERATIONS(half)
19871988{ \
19881989 switch(Operation){ \
19891990 case GroupOperationReduce: \
1990- DEFN_WORK_GROUP_REDUCE(type, op, X) \
1991+ DEFN_WORK_GROUP_REDUCE(type, op, identity, X) \
19911992 break; \
19921993 case GroupOperationInclusiveScan: \
19931994 DEFN_WORK_GROUP_SCAN_INCL(type, op, identity, X) \
0 commit comments