@@ -2484,33 +2484,41 @@ type __builtin_IB_WorkGroupReduce_##func##_##type_abbr(type X)
24842484 } \
24852485 SPIRV_BUILTIN (ControlBarrier , _i32_i32_i32 , )(Workgroup , 0 , AcquireRelease | WorkgroupMemory ); \
24862486 \
2487- type low_data ; \
2488- type high_data ; \
2489- type reduce ; \
2490- if (sg_size == 32 ) /* SIMD32 */ \
2487+ if (sg_id == 0 ) \
24912488 { \
2492- low_data = sg_lid < values_num ? scratch [sg_lid ] : identity ; \
2493- high_data = sg_lid + 32 < values_num ? scratch [sg_lid + 32 ] : identity ; \
2494- /* 64 (from 64) elements reduces to 32 */ \
2495- reduce = op (low_data , high_data ); \
2496- } \
2497- else if (sg_size == 16 ) /* SIMD16 */ \
2498- { \
2499- low_data = sg_lid < values_num ? scratch [sg_lid ] : identity ; \
2500- type mid_low_data = sg_lid + 16 < values_num ? scratch [sg_lid + 16 ] : identity ; \
2501- type mid_high_data = sg_lid + 32 < values_num ? scratch [sg_lid + 32 ] : identity ; \
2502- high_data = sg_lid + 32 + 16 < values_num ? scratch [sg_lid + 32 + 16 ] : identity ; \
2503- /* 32 first part (from 64) elements reduces to 16 */ \
2504- low_data = op (low_data , mid_low_data ); \
2505- /* 32 second part (from 64) elements reduces to 16 */ \
2506- high_data = op (mid_high_data , high_data ); \
2507- /* 64 (from 64) elements reduces to 16 */ \
2508- reduce = op (low_data , high_data ); \
2509- } \
2510- /* SIMD8 is not available on PVC */ \
2489+ type low_data ; \
2490+ type high_data ; \
2491+ type reduce ; \
25112492 \
2512- sg_x = SPIRV_BUILTIN (Group ##func , _i32_i32_##type_abbr, )(Subgroup, GroupOperationReduce, reduce); \
2513- return sg_x; \
2493+ if (sg_size == 32 ) /* SIMD32 */ \
2494+ { \
2495+ low_data = sg_lid < values_num ? scratch [sg_lid ] : identity ; \
2496+ high_data = sg_lid + 32 < values_num ? scratch [sg_lid + 32 ] : identity ; \
2497+ /* 64 (from 64) elements reduces to 32 */ \
2498+ reduce = op (low_data , high_data ); \
2499+ } \
2500+ else if (sg_size == 16 ) /* SIMD16 */ \
2501+ { \
2502+ low_data = sg_lid < values_num ? scratch [sg_lid ] : identity ; \
2503+ type mid_low_data = sg_lid + 16 < values_num ? scratch [sg_lid + 16 ] : identity ; \
2504+ type mid_high_data = sg_lid + 32 < values_num ? scratch [sg_lid + 32 ] : identity ; \
2505+ high_data = sg_lid + 32 + 16 < values_num ? scratch [sg_lid + 32 + 16 ] : identity ; \
2506+ /* 32 first part (from 64) elements reduces to 16 */ \
2507+ low_data = op (low_data , mid_low_data ); \
2508+ /* 32 second part (from 64) elements reduces to 16 */ \
2509+ high_data = op (mid_high_data , high_data ); \
2510+ /* 64 (from 64) elements reduces to 16 */ \
2511+ reduce = op (low_data , high_data ); \
2512+ } \
2513+ /* SIMD8 is not available on PVC */ \
2514+ \
2515+ sg_x = SPIRV_BUILTIN (Group ##func , _i32_i32_##type_abbr, )(Subgroup, GroupOperationReduce, reduce); \
2516+ if (sg_lid == 0) { \
2517+ scratch[0] = sg_x; \
2518+ } \
2519+ } \
2520+ SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2521+ return scratch[0]; \
25142522 } \
25152523 } \
25162524 else \
0 commit comments