From c272d8fff3b0018d6cca5712c4375cac3860efd6 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Fri, 25 Jul 2025 21:01:54 +0100 Subject: [PATCH 1/3] [mlir][gpu] Make offset and width in gpu.rotate as attributes `offset` and `width` must be constants and there are constraints on their values. Update the operation definition to use attributes instead of operands. --- mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 13 ++---- mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 18 ++++----- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 35 +++------------- mlir/test/Conversion/GPUToSPIRV/rotate.mlir | 38 +++--------------- mlir/test/Dialect/GPU/invalid.mlir | 40 +++---------------- mlir/test/Dialect/GPU/ops.mlir | 5 +-- 6 files changed, 30 insertions(+), 119 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 1dbaf5db7b618..170616f03be2c 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -1368,12 +1368,12 @@ def GPU_ShuffleOp : GPU_Op< def GPU_RotateOp : GPU_Op< "rotate", [Pure, AllTypesMatch<["value", "rotateResult"]>]>, - Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32:$offset, I32:$width)>, + Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32Attr:$offset, I32Attr:$width)>, Results<(outs AnyIntegerOrFloatOr1DVector:$rotateResult, I1:$valid)> { let summary = "Rotate values within a subgroup."; let description = [{ The "rotate" op moves values across lanes in a subgroup (a.k.a., local - invocations) within the same subgroup. The `width` argument specifies the + invocations) within the same subgroup. The `width` attribute specifies the number of lanes that participate in the rotation, and must be uniform across all participating lanes. Further, the first `width` lanes of the subgroup must be active. @@ -1394,9 +1394,7 @@ def GPU_RotateOp : GPU_Op< example: ```mlir - %offset = arith.constant 1 : i32 - %width = arith.constant 16 : i32 - %1, %2 = gpu.rotate %0, %offset, %width : f32 + %1, %2 = gpu.rotate %0, 1, 16 : f32 ``` For lane `k`, returns the value from lane `(k + cst1) % width`. @@ -1406,11 +1404,6 @@ def GPU_RotateOp : GPU_Op< $value `,` $offset `,` $width attr-dict `:` type($value) }]; - let builders = [ - // Helper function that creates a rotate with constant offset/width. - OpBuilder<(ins "Value":$value, "int32_t":$offset, "int32_t":$width)> - ]; - let hasVerifier = 1; } diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp index 75e65632b0cb7..a156fdd3d383e 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -507,25 +507,25 @@ LogicalResult GPURotateConversion::matchAndRewrite( getTypeConverter()->getTargetEnv(); unsigned subgroupSize = targetEnv.getAttr().getResourceLimits().getSubgroupSize(); - IntegerAttr widthAttr; - if (!matchPattern(rotateOp.getWidth(), m_Constant(&widthAttr)) || - widthAttr.getValue().getZExtValue() > subgroupSize) + unsigned width = rotateOp.getWidth(); + if (width > subgroupSize) return rewriter.notifyMatchFailure( - rotateOp, - "rotate width is not a constant or larger than target subgroup size"); + rotateOp, "rotate width is larger than target subgroup size"); Location loc = rotateOp.getLoc(); auto scope = rewriter.getAttr(spirv::Scope::Subgroup); + Value offsetVal = arith::ConstantOp::create(rewriter, loc, adaptor.getOffsetAttr()); + Value widthVal = arith::ConstantOp::create(rewriter, loc, adaptor.getWidthAttr()); Value rotateResult = spirv::GroupNonUniformRotateKHROp::create( - rewriter, loc, scope, adaptor.getValue(), adaptor.getOffset(), - adaptor.getWidth()); + rewriter, loc, scope, adaptor.getValue(), offsetVal, widthVal); Value validVal; - if (widthAttr.getValue().getZExtValue() == subgroupSize) { + if (width == subgroupSize) { validVal = spirv::ConstantOp::getOne(rewriter.getI1Type(), loc, rewriter); } else { + IntegerAttr widthAttr = adaptor.getWidthAttr(); Value laneId = gpu::LaneIdOp::create(rewriter, loc, widthAttr); validVal = arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::ult, - laneId, adaptor.getWidth()); + laneId, widthVal); } rewriter.replaceOp(rotateOp, {rotateResult, validVal}); diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index d186a480c0ce5..abd2ba5680471 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -1395,40 +1395,15 @@ void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value, // RotateOp //===----------------------------------------------------------------------===// -void RotateOp::build(OpBuilder &builder, OperationState &result, Value value, - int32_t offset, int32_t width) { - build(builder, result, value, - arith::ConstantOp::create(builder, result.location, - builder.getI32IntegerAttr(offset)), - arith::ConstantOp::create(builder, result.location, - builder.getI32IntegerAttr(width))); -} - LogicalResult RotateOp::verify() { - auto offsetConstOp = getOffset().getDefiningOp(); - if (!offsetConstOp) - return emitOpError() << "offset is not a constant value"; - - auto offsetIntAttr = - llvm::dyn_cast(offsetConstOp.getValue()); - - auto widthConstOp = getWidth().getDefiningOp(); - if (!widthConstOp) - return emitOpError() << "width is not a constant value"; - - auto widthIntAttr = - llvm::dyn_cast(widthConstOp.getValue()); - - llvm::APInt offsetValue = offsetIntAttr.getValue(); - llvm::APInt widthValue = widthIntAttr.getValue(); + uint32_t offset = getOffset(); + uint32_t width = getWidth(); - if (!widthValue.isPowerOf2()) + if (!llvm::isPowerOf2_32(width)) return emitOpError() << "width must be a power of two"; - if (offsetValue.sge(widthValue) || offsetValue.slt(0)) { - int64_t widthValueInt = widthValue.getSExtValue(); - return emitOpError() << "offset must be in the range [0, " << widthValueInt - << ")"; + if (offset >= width) { + return emitOpError() << "offset must be in the range [0, " << width << ")"; } return success(); diff --git a/mlir/test/Conversion/GPUToSPIRV/rotate.mlir b/mlir/test/Conversion/GPUToSPIRV/rotate.mlir index b96dd37219b46..c71d22017698a 100644 --- a/mlir/test/Conversion/GPUToSPIRV/rotate.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/rotate.mlir @@ -10,16 +10,14 @@ gpu.module @kernels { // CHECK-LABEL: spirv.func @rotate() gpu.func @rotate() kernel attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { - %offset = arith.constant 4 : i32 - %width = arith.constant 16 : i32 %val = arith.constant 42.0 : f32 + // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32 // CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32 // CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32 - // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32 // CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32 // CHECK: %{{.+}} = spirv.Constant true - %result, %valid = gpu.rotate %val, %offset, %width : f32 + %result, %valid = gpu.rotate %val, 4, 16 : f32 gpu.return } } @@ -38,18 +36,16 @@ gpu.module @kernels { // CHECK-LABEL: spirv.func @rotate_width_less_than_subgroup_size() gpu.func @rotate_width_less_than_subgroup_size() kernel attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { - %offset = arith.constant 4 : i32 - %width = arith.constant 8 : i32 %val = arith.constant 42.0 : f32 + // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32 // CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32 // CHECK: %[[WIDTH:.+]] = spirv.Constant 8 : i32 - // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32 // CHECK: %{{.+}} = spirv.GroupNonUniformRotateKHR %[[VAL]], %[[OFFSET]], cluster_size(%[[WIDTH]]) : f32, i32, i32 -> f32 // CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__ // CHECK: %[[INVOCATION_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]] // CHECK: %{{.+}} = spirv.ULessThan %[[INVOCATION_ID]], %[[WIDTH]] - %result, %valid = gpu.rotate %val, %offset, %width : f32 + %result, %valid = gpu.rotate %val, 4, 8 : f32 gpu.return } } @@ -67,34 +63,10 @@ module attributes { gpu.module @kernels { gpu.func @rotate_with_bigger_than_subgroup_size() kernel attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { - %offset = arith.constant 4 : i32 - %width = arith.constant 32 : i32 %val = arith.constant 42.0 : f32 // expected-error @+1 {{failed to legalize operation 'gpu.rotate'}} - %result, %valid = gpu.rotate %val, %offset, %width : f32 - gpu.return - } -} - -} - -// ----- - -module attributes { - gpu.container_module, - spirv.target_env = #spirv.target_env<#spirv.vce, - #spirv.resource_limits> -} { - -gpu.module @kernels { - gpu.func @rotate_non_const_width(%width: i32) kernel - attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { - %offset = arith.constant 4 : i32 - %val = arith.constant 42.0 : f32 - - // expected-error @+1 {{'gpu.rotate' op width is not a constant value}} - %result, %valid = gpu.rotate %val, %offset, %width : f32 + %result, %valid = gpu.rotate %val, 4, 32 : f32 gpu.return } } diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 162ff0662e91e..0d2dc6fa34b02 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -479,20 +479,16 @@ func.func @shuffle_unsupported_type_vec(%arg0 : vector<[4]xf32>, %arg1 : i32, %a // ----- func.func @rotate_mismatching_type(%arg0 : f32) { - %offset = arith.constant 4 : i32 - %width = arith.constant 16 : i32 // expected-error@+1 {{op failed to verify that all of {value, rotateResult} have same type}} - %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (i32, i1) + %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 16 : i32 } : (f32) -> (i32, i1) return } // ----- func.func @rotate_unsupported_type(%arg0 : index) { - %offset = arith.constant 4 : i32 - %width = arith.constant 16 : i32 // expected-error@+1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'index'}} - %rotate, %valid = gpu.rotate %arg0, %offset, %width : index + %rotate, %valid = gpu.rotate %arg0, 4, 16 : index return } @@ -502,55 +498,31 @@ func.func @rotate_unsupported_type_vec(%arg0 : vector<[4]xf32>) { %offset = arith.constant 4 : i32 %width = arith.constant 16 : i32 // expected-error@+1 {{op operand #0 must be Integer or Float or fixed-length vector of Integer or Float values of ranks 1, but got 'vector<[4]xf32>'}} - %rotate, %valid = gpu.rotate %arg0, %offset, %width : vector<[4]xf32> + %rotate, %valid = gpu.rotate %arg0, 4, 16 : vector<[4]xf32> return } // ----- func.func @rotate_unsupported_width(%arg0 : f32) { - %offset = arith.constant 4 : i32 - %width = arith.constant 15 : i32 // expected-error@+1 {{op width must be a power of two}} - %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1) + %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 15 : i32 } : (f32) -> (f32, i1) return } // ----- func.func @rotate_unsupported_offset(%arg0 : f32) { - %offset = arith.constant 16 : i32 - %width = arith.constant 16 : i32 // expected-error@+1 {{op offset must be in the range [0, 16)}} - %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1) + %rotate, %valid = "gpu.rotate"(%arg0) { offset = 16 : i32, width = 16 : i32 }: (f32) -> (f32, i1) return } // ----- func.func @rotate_unsupported_offset_minus(%arg0 : f32) { - %offset = arith.constant -1 : i32 - %width = arith.constant 16 : i32 // expected-error@+1 {{op offset must be in the range [0, 16)}} - %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1) - return -} - -// ----- - -func.func @rotate_offset_non_constant(%arg0 : f32, %offset : i32) { - %width = arith.constant 16 : i32 - // expected-error@+1 {{op offset is not a constant value}} - %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1) - return -} - -// ----- - -func.func @rotate_width_non_constant(%arg0 : f32, %width : i32) { - %offset = arith.constant 0 : i32 - // expected-error@+1 {{op width is not a constant value}} - %rotate, %valid = "gpu.rotate"(%arg0, %offset, %width) : (f32, i32, i32) -> (f32, i1) + %rotate, %valid = "gpu.rotate"(%arg0) { offset = -1 : i32, width = 16 : i32 } : (f32) -> (f32, i1) return } diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index 2aef80f73feb3..ee1fdfa4d02f0 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -140,9 +140,8 @@ module attributes {gpu.container_module} { // CHECK: gpu.shuffle idx %{{.*}}, %{{.*}}, %{{.*}} : f32 %shfl3, %pred3 = gpu.shuffle idx %arg0, %offset, %width : f32 - // CHECK: gpu.rotate %{{.*}}, %{{.*}}, %{{.*}} : f32 - %rotate_width = arith.constant 16 : i32 - %rotate, %pred4 = gpu.rotate %arg0, %offset, %rotate_width : f32 + // CHECK: gpu.rotate %{{.*}}, 3, 16 : f32 + %rotate, %pred4 = gpu.rotate %arg0, 3, 16 : f32 "gpu.barrier"() : () -> () From bc0c04df9a178adcf40b4be64abe5f90ab4951db Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Mon, 28 Jul 2025 10:00:46 +0100 Subject: [PATCH 2/3] code formatting --- mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp index a156fdd3d383e..18178614e39cd 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -514,8 +514,10 @@ LogicalResult GPURotateConversion::matchAndRewrite( Location loc = rotateOp.getLoc(); auto scope = rewriter.getAttr(spirv::Scope::Subgroup); - Value offsetVal = arith::ConstantOp::create(rewriter, loc, adaptor.getOffsetAttr()); - Value widthVal = arith::ConstantOp::create(rewriter, loc, adaptor.getWidthAttr()); + Value offsetVal = + arith::ConstantOp::create(rewriter, loc, adaptor.getOffsetAttr()); + Value widthVal = + arith::ConstantOp::create(rewriter, loc, adaptor.getWidthAttr()); Value rotateResult = spirv::GroupNonUniformRotateKHROp::create( rewriter, loc, scope, adaptor.getValue(), offsetVal, widthVal); Value validVal; From 52f9075e5082a5f02e6c28f235ade49bb30a1426 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Mon, 28 Jul 2025 15:51:53 +0100 Subject: [PATCH 3/3] use ConfinedAttr to constrain values --- mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 4 +++- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 3 --- mlir/test/Dialect/GPU/invalid.mlir | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 170616f03be2c..2ed7d3810b918 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -1368,7 +1368,9 @@ def GPU_ShuffleOp : GPU_Op< def GPU_RotateOp : GPU_Op< "rotate", [Pure, AllTypesMatch<["value", "rotateResult"]>]>, - Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32Attr:$offset, I32Attr:$width)>, + Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, + ConfinedAttr]>:$offset, + ConfinedAttr:$width)>, Results<(outs AnyIntegerOrFloatOr1DVector:$rotateResult, I1:$valid)> { let summary = "Rotate values within a subgroup."; let description = [{ diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index abd2ba5680471..5a72ef17db7f0 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -1399,9 +1399,6 @@ LogicalResult RotateOp::verify() { uint32_t offset = getOffset(); uint32_t width = getWidth(); - if (!llvm::isPowerOf2_32(width)) - return emitOpError() << "width must be a power of two"; - if (offset >= width) { return emitOpError() << "offset must be in the range [0, " << width << ")"; } diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 0d2dc6fa34b02..35381dab7b200 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -505,7 +505,7 @@ func.func @rotate_unsupported_type_vec(%arg0 : vector<[4]xf32>) { // ----- func.func @rotate_unsupported_width(%arg0 : f32) { - // expected-error@+1 {{op width must be a power of two}} + // expected-error@+1 {{'gpu.rotate' op attribute 'width' failed to satisfy constraint: 32-bit signless integer attribute whose value is a power of two > 0}} %rotate, %valid = "gpu.rotate"(%arg0) { offset = 4 : i32, width = 15 : i32 } : (f32) -> (f32, i1) return } @@ -521,7 +521,7 @@ func.func @rotate_unsupported_offset(%arg0 : f32) { // ----- func.func @rotate_unsupported_offset_minus(%arg0 : f32) { - // expected-error@+1 {{op offset must be in the range [0, 16)}} + // expected-error@+1 {{'gpu.rotate' op attribute 'offset' failed to satisfy constraint: 32-bit signless integer attribute whose minimum value is 0}} %rotate, %valid = "gpu.rotate"(%arg0) { offset = -1 : i32, width = 16 : i32 } : (f32) -> (f32, i1) return }