@@ -83,11 +83,11 @@ struct GpuAllReduceRewriter {
8383
8484 // Compute lane id (invocation id withing the subgroup).
8585 Value subgroupMask =
86- create<arith::ConstantIntOp>(kSubgroupSize - 1 , int32Type );
86+ create<arith::ConstantIntOp>(int32Type, kSubgroupSize - 1 );
8787 Value laneId = create<arith::AndIOp>(invocationIdx, subgroupMask);
8888 Value isFirstLane =
8989 create<arith::CmpIOp>(arith::CmpIPredicate::eq, laneId,
90- create<arith::ConstantIntOp>(0 , int32Type ));
90+ create<arith::ConstantIntOp>(int32Type, 0 ));
9191
9292 Value numThreadsWithSmallerSubgroupId =
9393 create<arith::SubIOp>(invocationIdx, laneId);
@@ -282,7 +282,7 @@ struct GpuAllReduceRewriter {
282282 // / The first lane returns the result, all others return values are undefined.
283283 Value createSubgroupReduce (Value activeWidth, Value laneId, Value operand,
284284 AccumulatorFactory &accumFactory) {
285- Value subgroupSize = create<arith::ConstantIntOp>(kSubgroupSize , int32Type );
285+ Value subgroupSize = create<arith::ConstantIntOp>(int32Type, kSubgroupSize );
286286 Value isPartialSubgroup = create<arith::CmpIOp>(arith::CmpIPredicate::slt,
287287 activeWidth, subgroupSize);
288288 std::array<Type, 2 > shuffleType = {valueType, rewriter.getI1Type ()};
@@ -296,7 +296,7 @@ struct GpuAllReduceRewriter {
296296 // lane is within the active range. The accumulated value is available
297297 // in the first lane.
298298 for (int i = 1 ; i < kSubgroupSize ; i <<= 1 ) {
299- Value offset = create<arith::ConstantIntOp>(i, int32Type );
299+ Value offset = create<arith::ConstantIntOp>(int32Type, i );
300300 auto shuffleOp = create<gpu::ShuffleOp>(
301301 shuffleType, value, offset, activeWidth, gpu::ShuffleMode::XOR);
302302 // Skip the accumulation if the shuffle op read from a lane outside
@@ -318,7 +318,7 @@ struct GpuAllReduceRewriter {
318318 [&] {
319319 Value value = operand;
320320 for (int i = 1 ; i < kSubgroupSize ; i <<= 1 ) {
321- Value offset = create<arith::ConstantIntOp>(i, int32Type );
321+ Value offset = create<arith::ConstantIntOp>(int32Type, i );
322322 auto shuffleOp =
323323 create<gpu::ShuffleOp>(shuffleType, value, offset, subgroupSize,
324324 gpu::ShuffleMode::XOR);
@@ -331,7 +331,7 @@ struct GpuAllReduceRewriter {
331331
332332 // / Returns value divided by the subgroup size (i.e. 32).
333333 Value getDivideBySubgroupSize (Value value) {
334- Value subgroupSize = create<arith::ConstantIntOp>(kSubgroupSize , int32Type );
334+ Value subgroupSize = create<arith::ConstantIntOp>(int32Type, kSubgroupSize );
335335 return create<arith::DivSIOp>(int32Type, value, subgroupSize);
336336 }
337337
0 commit comments