renamings

Hardcode84 · Hardcode84 · commit fffce28ad091 · 2025-08-29T09:09:01.000+02:00
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -3218,16 +3218,16 @@ def GPU_WarpExecuteOnLane0Op : GPU_Op<"warp_execute_on_lane_0",
 def GPU_BroadcastType : I32EnumAttr<"BroadcastType",
     "a lane to broadcast from",
     [
-      I32EnumAttrCase<"first_lane", 0>,
+      I32EnumAttrCase<"first_active_lane", 0>,
       I32EnumAttrCase<"any_lane", 1>,
-      I32EnumAttrCase<"lane", 2>
+      I32EnumAttrCase<"specific_lane", 2>
     ]>{
   let genSpecializedAttr = 0;
   let cppNamespace = "::mlir::gpu";
 }
 def GPU_BroadcastTypeAttr : EnumAttr<GPU_Dialect, GPU_BroadcastType, "broadcast">;
 
-def GPU_BroadcastLaneOp : GPU_Op<"broadcast_lane",
+def GPU_SubgroupBroadcastOp : GPU_Op<"subgroup_broadcast",
     [NoMemoryEffect, AllTypesMatch<["result", "src"]>,
     DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
     DeclareOpInterfaceMethods<ConditionallySpeculatable, ["getSpeculatability"]>] #
@@ -3237,23 +3237,24 @@ def GPU_BroadcastLaneOp : GPU_Op<"broadcast_lane",
                  GPU_BroadcastTypeAttr:$broadcast_type)> {
   let summary = "Broadcasts a value from the specific lane across subgroup";
   let description = [{
-      Broadcasts a value from one lane to all lanes in a subgroup. The
-      result is guaranteed to be uniform across the subgroup.
+      Broadcasts a value from one lane to all active lanes in a subgroup. The
+      result is guaranteed to be uniform across the active lanes in subgroup.
 
       The possible broadcast types are:
 
-      * `first_lane` - broadcasts the value from the first active lane in the
-      subgroup.
-      * `lane` - broadcasts from the specified lane. The lane index must be
-      uniform and within the subgroup size. The result is poison if the lane
-      index is invalid or non-subgroup-uniform.
+      * `first_active_lane` - broadcasts the value from the first active lane
+      in the subgroup.
+      * `specific_lane` - broadcasts from the specified lane. The lane index
+      must be uniform and within the subgroup size. The result is poison if the
+      lane index is invalid, non subgroup-uniform, or if the source lane is not
+      active.
       * `any_lane` - broadcasts the value from any lane of the subgroup,
-      active or inactive, assuming the input is already subgroup uniform. The
-      result is poison if the input is not uniform. This is useful to convey
-      uniformity to the compiler to enable more optimizations. Also, it allows
-      more speculation opportunities than `first_lane` since `first_lane`
-      results can depend on active lanes which may change during speculation
-      across control flow.
+      assuming the input is already subgroup uniform. The result is poison if
+      the input is not uniform. This is useful to convey uniformity to the
+      compiler to enable more optimizations. Also, it allows more speculation
+      opportunities than `first_active_lane` since `first_active_lane` results
+      can depend on active lanes which may change during speculation across
+      control flow.
   }];
   let results = (outs AnyType:$result);
   let assemblyFormat = [{
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -171,18 +171,18 @@ struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
   const amdgpu::Chipset chipset;
 };
 
-struct GPUBroadcastLaneOpToROCDL
-    : public ConvertOpToLLVMPattern<gpu::BroadcastLaneOp> {
+struct GPUSubgroupBroadcastOpToROCDL
+    : public ConvertOpToLLVMPattern<gpu::SubgroupBroadcastOp> {
   using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
 
   LogicalResult
-  matchAndRewrite(gpu::BroadcastLaneOp op, OpAdaptor adaptor,
+  matchAndRewrite(gpu::SubgroupBroadcastOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     Value src = adaptor.getSrc();
-    if (adaptor.getBroadcastType() == gpu::BroadcastType::lane) {
+    if (adaptor.getBroadcastType() == gpu::BroadcastType::specific_lane) {
       rewriter.replaceOpWithNewOp<ROCDL::ReadlaneOp>(op, src.getType(), src,
                                                      adaptor.getLane());
-    } else { // first_lane or any_lane
+    } else { // first_active_lane or any_lane
       // any_lane is lowered to readfirstlane too, to force value into scalar
       // register.
       rewriter.replaceOpWithNewOp<ROCDL::ReadfirstlaneOp>(op, src.getType(),
@@ -484,9 +484,8 @@ void mlir::populateGpuToROCDLConversionPatterns(
   // TODO: Add alignment for workgroup memory
   patterns.add<GPUDynamicSharedMemoryOpLowering>(converter);
 
-  patterns
-      .add<GPUShuffleOpLowering, GPULaneIdOpToROCDL, GPUBroadcastLaneOpToROCDL>(
-          converter);
+  patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL,
+               GPUSubgroupBroadcastOpToROCDL>(converter);
   patterns.add<GPUSubgroupSizeOpToROCDL>(converter, chipset);
 
   populateMathToROCDLConversionPatterns(converter, patterns);
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -2515,38 +2515,40 @@ gpu::YieldOp WarpExecuteOnLane0Op::getTerminator() {
 }
 
 //===----------------------------------------------------------------------===//
-// GPU_BroadcastLaneOp
+// GPU_SubgroupBroadcastOp
 //===----------------------------------------------------------------------===//
 
-void gpu::BroadcastLaneOp::inferResultRanges(
+void gpu::SubgroupBroadcastOp::inferResultRanges(
     ArrayRef<ConstantIntRanges> argRanges, SetIntRangeFn setResultRange) {
   setResultRange(getResult(), argRanges.front());
 }
 
-Speculation::Speculatability gpu::BroadcastLaneOp::getSpeculatability() {
+Speculation::Speculatability gpu::SubgroupBroadcastOp::getSpeculatability() {
   switch (getBroadcastType()) {
-  case BroadcastType::first_lane:
+  case BroadcastType::first_active_lane:
     // Cannot speculate first_lane broadcast, because speculating it across
     // control flow can change the active lanes.
     return Speculation::NotSpeculatable;
   case BroadcastType::any_lane:
     LLVM_FALLTHROUGH;
-  case BroadcastType::lane:
+  case BroadcastType::specific_lane:
     return Speculation::Speculatable;
   }
 }
 
-LogicalResult gpu::BroadcastLaneOp::verify() {
+LogicalResult gpu::SubgroupBroadcastOp::verify() {
   switch (getBroadcastType()) {
-  case BroadcastType::first_lane:
+  case BroadcastType::first_active_lane:
     LLVM_FALLTHROUGH;
   case BroadcastType::any_lane:
     if (getLane())
-      return emitOpError() << "lane can only be specified for lane broadcast";
+      return emitOpError()
+             << "lane can only be specified for `specific_lane` broadcast";
     return success();
-  case BroadcastType::lane:
+  case BroadcastType::specific_lane:
     if (!getLane())
-      return emitOpError() << "lane must be specified for lane broadcast";
+      return emitOpError()
+             << "lane must be specified for `specific_lane` broadcast";
     return success();
   }
 }
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -812,9 +812,9 @@ func.func @broadcast(%arg0 : index, %arg1 : i32) -> (index, index, index) {
 //       CHECK:   %{{.*}} = rocdl.readfirstlane %[[ARG]] : i64
 //       CHECK:   %{{.*}} = rocdl.readfirstlane %[[ARG]] : i64
 //       CHECK:   %{{.*}} = rocdl.readlane %[[ARG]], %[[IDX]] : (i64, i32) -> i64
-  %0 = gpu.broadcast_lane %arg0, first_lane : index
-  %1 = gpu.broadcast_lane %arg0, any_lane : index
-  %2 = gpu.broadcast_lane %arg0, lane %arg1 : index
+  %0 = gpu.subgroup_broadcast %arg0, first_active_lane : index
+  %1 = gpu.subgroup_broadcast %arg0, any_lane : index
+  %2 = gpu.subgroup_broadcast %arg0, specific_lane %arg1 : index
   func.return %0, %1, %2 : index, index, index
 }
 }
diff --git a/mlir/test/Dialect/GPU/broadcast-speculatability.mlir b/mlir/test/Dialect/GPU/broadcast-speculatability.mlir
@@ -3,20 +3,21 @@
 func.func private @side_effect(%arg0 : f32, %arg1 : f32, %arg2 : f32)
 
 // CHECK-LABEL: func @broadcast_hoisting
-//  CHECK-SAME: (%[[ARG:.*]]: f32, %[[IDX:.*]]: i32)
-func.func @broadcast_hoisting(%arg0 : f32, %arg1 : i32) {
+//  CHECK-SAME: (%[[ARG:.*]]: f32, %[[IDX:.*]]: i32, {{.*}}: index)
+func.func @broadcast_hoisting(%arg0 : f32, %arg1 : i32, %arg2 : index) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
-  %c10 = arith.constant 10 : index
-// CHECK: %[[V1:.*]] = gpu.broadcast_lane %[[ARG]], any_lane : f32
-// CHECK: %[[V2:.*]] = gpu.broadcast_lane %[[ARG]], lane %[[IDX]] : f32
+// `any_lane` and `specific_lane` can be speculated across the control flow, but
+// `first_active_lane` cannot as active lanes can change.
+// CHECK: %[[V1:.*]] = gpu.subgroup_broadcast %[[ARG]], any_lane : f32
+// CHECK: %[[V2:.*]] = gpu.subgroup_broadcast %[[ARG]], specific_lane %[[IDX]] : f32
 // CHECK: scf.for
-// CHECK: %[[V0:.*]] = gpu.broadcast_lane %[[ARG]], first_lane : f32
+// CHECK: %[[V0:.*]] = gpu.subgroup_broadcast %[[ARG]], first_active_lane : f32
 // CHECK: func.call @side_effect(%[[V0]], %[[V1]], %[[V2]])
-  scf.for %i = %c0 to %c10 step %c1 {
-    %0 = gpu.broadcast_lane %arg0, first_lane : f32
-    %1 = gpu.broadcast_lane %arg0, any_lane : f32
-    %2 = gpu.broadcast_lane %arg0, lane %arg1 : f32
+  scf.for %i = %c0 to %arg2 step %c1 {
+    %0 = gpu.subgroup_broadcast %arg0, first_active_lane : f32
+    %1 = gpu.subgroup_broadcast %arg0, any_lane : f32
+    %2 = gpu.subgroup_broadcast %arg0, specific_lane %arg1 : f32
     func.call @side_effect(%0, %1, %2) : (f32, f32, f32) -> ()
   }
   func.return
diff --git a/mlir/test/Dialect/GPU/int-range-interface.mlir b/mlir/test/Dialect/GPU/int-range-interface.mlir
@@ -335,9 +335,9 @@ module attributes {gpu.container_module} {
 // CHECK-LABEL: func @broadcast
 func.func @broadcast(%idx: i32) {
   %0 = test.with_bounds { umin = 0 : index, umax = 10 : index, smin = 0 : index, smax = 10 : index } : index
-  %1 = gpu.broadcast_lane %0, first_lane : index
-  %2 = gpu.broadcast_lane %0, any_lane : index
-  %3 = gpu.broadcast_lane %0, lane %idx : index
+  %1 = gpu.subgroup_broadcast %0, first_active_lane : index
+  %2 = gpu.subgroup_broadcast %0, any_lane : index
+  %3 = gpu.subgroup_broadcast %0, specific_lane %idx : index
 
   // CHECK: test.reflect_bounds {smax = 10 : index, smin = 0 : index, umax = 10 : index, umin = 0 : index}
   // CHECK: test.reflect_bounds {smax = 10 : index, smin = 0 : index, umax = 10 : index, umin = 0 : index}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
@@ -543,14 +543,14 @@ func.func @warp_operand_result(%laneid: index, %v0 : vector<4xi32>) -> (vector<4
   return %2 : vector<4xi32>
 }
 
-// CHECK-LABEL: func @broadcast_lane
+// CHECK-LABEL: func @subgroup_broadcast
 //  CHECK-SAME: (%[[ARG:.*]]: f32, %[[IDX:.*]]: i32)
-func.func @broadcast_lane(%arg0 : f32, %arg1 : i32) -> (f32, f32, f32) {
-  // CHECK: gpu.broadcast_lane %[[ARG]], first_lane : f32
-  %0 = gpu.broadcast_lane %arg0, first_lane : f32
-  // CHECK: gpu.broadcast_lane %[[ARG]], any_lane : f32
-  %1 = gpu.broadcast_lane %arg0, any_lane : f32
-  // CHECK: gpu.broadcast_lane %[[ARG]], lane %[[IDX]] : f32
-  %2 = gpu.broadcast_lane %arg0, lane %arg1 : f32
+func.func @subgroup_broadcast(%arg0 : f32, %arg1 : i32) -> (f32, f32, f32) {
+  // CHECK: gpu.subgroup_broadcast %[[ARG]], first_active_lane : f32
+  %0 = gpu.subgroup_broadcast %arg0, first_active_lane : f32
+  // CHECK: gpu.subgroup_broadcast %[[ARG]], any_lane : f32
+  %1 = gpu.subgroup_broadcast %arg0, any_lane : f32
+  // CHECK: gpu.subgroup_broadcast %[[ARG]], specific_lane %[[IDX]] : f32
+  %2 = gpu.subgroup_broadcast %arg0, specific_lane %arg1 : f32
   func.return %0, %1, %2 : f32, f32, f32
 }

Original file line number	Diff line number	Diff line change
`@@ -812,9 +812,9 @@ func.func @broadcast(%arg0 : index, %arg1 : i32) -> (index, index, index) {`
`812`	`812`	`// CHECK: %{{.*}} = rocdl.readfirstlane %[[ARG]] : i64`
`813`	`813`	`// CHECK: %{{.*}} = rocdl.readfirstlane %[[ARG]] : i64`
`814`	`814`	`// CHECK: %{{.*}} = rocdl.readlane %[[ARG]], %[[IDX]] : (i64, i32) -> i64`
`815`		`- %0 = gpu.broadcast_lane %arg0, first_lane : index`
`816`		`- %1 = gpu.broadcast_lane %arg0, any_lane : index`
`817`		`- %2 = gpu.broadcast_lane %arg0, lane %arg1 : index`
	`815`	`+ %0 = gpu.subgroup_broadcast %arg0, first_active_lane : index`
	`816`	`+ %1 = gpu.subgroup_broadcast %arg0, any_lane : index`
	`817`	`+ %2 = gpu.subgroup_broadcast %arg0, specific_lane %arg1 : index`
`818`	`818`	`func.return %0, %1, %2 : index, index, index`
`819`	`819`	`}`
`820`	`820`	`}`