Skip to content

Commit fffce28

Browse files
committed
renamings
1 parent 97e24cd commit fffce28

File tree

7 files changed

+61
-58
lines changed

7 files changed

+61
-58
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3218,16 +3218,16 @@ def GPU_WarpExecuteOnLane0Op : GPU_Op<"warp_execute_on_lane_0",
32183218
def GPU_BroadcastType : I32EnumAttr<"BroadcastType",
32193219
"a lane to broadcast from",
32203220
[
3221-
I32EnumAttrCase<"first_lane", 0>,
3221+
I32EnumAttrCase<"first_active_lane", 0>,
32223222
I32EnumAttrCase<"any_lane", 1>,
3223-
I32EnumAttrCase<"lane", 2>
3223+
I32EnumAttrCase<"specific_lane", 2>
32243224
]>{
32253225
let genSpecializedAttr = 0;
32263226
let cppNamespace = "::mlir::gpu";
32273227
}
32283228
def GPU_BroadcastTypeAttr : EnumAttr<GPU_Dialect, GPU_BroadcastType, "broadcast">;
32293229

3230-
def GPU_BroadcastLaneOp : GPU_Op<"broadcast_lane",
3230+
def GPU_SubgroupBroadcastOp : GPU_Op<"subgroup_broadcast",
32313231
[NoMemoryEffect, AllTypesMatch<["result", "src"]>,
32323232
DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
32333233
DeclareOpInterfaceMethods<ConditionallySpeculatable, ["getSpeculatability"]>] #
@@ -3237,23 +3237,24 @@ def GPU_BroadcastLaneOp : GPU_Op<"broadcast_lane",
32373237
GPU_BroadcastTypeAttr:$broadcast_type)> {
32383238
let summary = "Broadcasts a value from the specific lane across subgroup";
32393239
let description = [{
3240-
Broadcasts a value from one lane to all lanes in a subgroup. The
3241-
result is guaranteed to be uniform across the subgroup.
3240+
Broadcasts a value from one lane to all active lanes in a subgroup. The
3241+
result is guaranteed to be uniform across the active lanes in subgroup.
32423242

32433243
The possible broadcast types are:
32443244

3245-
* `first_lane` - broadcasts the value from the first active lane in the
3246-
subgroup.
3247-
* `lane` - broadcasts from the specified lane. The lane index must be
3248-
uniform and within the subgroup size. The result is poison if the lane
3249-
index is invalid or non-subgroup-uniform.
3245+
* `first_active_lane` - broadcasts the value from the first active lane
3246+
in the subgroup.
3247+
* `specific_lane` - broadcasts from the specified lane. The lane index
3248+
must be uniform and within the subgroup size. The result is poison if the
3249+
lane index is invalid, non subgroup-uniform, or if the source lane is not
3250+
active.
32503251
* `any_lane` - broadcasts the value from any lane of the subgroup,
3251-
active or inactive, assuming the input is already subgroup uniform. The
3252-
result is poison if the input is not uniform. This is useful to convey
3253-
uniformity to the compiler to enable more optimizations. Also, it allows
3254-
more speculation opportunities than `first_lane` since `first_lane`
3255-
results can depend on active lanes which may change during speculation
3256-
across control flow.
3252+
assuming the input is already subgroup uniform. The result is poison if
3253+
the input is not uniform. This is useful to convey uniformity to the
3254+
compiler to enable more optimizations. Also, it allows more speculation
3255+
opportunities than `first_active_lane` since `first_active_lane` results
3256+
can depend on active lanes which may change during speculation across
3257+
control flow.
32573258
}];
32583259
let results = (outs AnyType:$result);
32593260
let assemblyFormat = [{

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -171,18 +171,18 @@ struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
171171
const amdgpu::Chipset chipset;
172172
};
173173

174-
struct GPUBroadcastLaneOpToROCDL
175-
: public ConvertOpToLLVMPattern<gpu::BroadcastLaneOp> {
174+
struct GPUSubgroupBroadcastOpToROCDL
175+
: public ConvertOpToLLVMPattern<gpu::SubgroupBroadcastOp> {
176176
using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
177177

178178
LogicalResult
179-
matchAndRewrite(gpu::BroadcastLaneOp op, OpAdaptor adaptor,
179+
matchAndRewrite(gpu::SubgroupBroadcastOp op, OpAdaptor adaptor,
180180
ConversionPatternRewriter &rewriter) const override {
181181
Value src = adaptor.getSrc();
182-
if (adaptor.getBroadcastType() == gpu::BroadcastType::lane) {
182+
if (adaptor.getBroadcastType() == gpu::BroadcastType::specific_lane) {
183183
rewriter.replaceOpWithNewOp<ROCDL::ReadlaneOp>(op, src.getType(), src,
184184
adaptor.getLane());
185-
} else { // first_lane or any_lane
185+
} else { // first_active_lane or any_lane
186186
// any_lane is lowered to readfirstlane too, to force value into scalar
187187
// register.
188188
rewriter.replaceOpWithNewOp<ROCDL::ReadfirstlaneOp>(op, src.getType(),
@@ -484,9 +484,8 @@ void mlir::populateGpuToROCDLConversionPatterns(
484484
// TODO: Add alignment for workgroup memory
485485
patterns.add<GPUDynamicSharedMemoryOpLowering>(converter);
486486

487-
patterns
488-
.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL, GPUBroadcastLaneOpToROCDL>(
489-
converter);
487+
patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL,
488+
GPUSubgroupBroadcastOpToROCDL>(converter);
490489
patterns.add<GPUSubgroupSizeOpToROCDL>(converter, chipset);
491490

492491
populateMathToROCDLConversionPatterns(converter, patterns);

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2515,38 +2515,40 @@ gpu::YieldOp WarpExecuteOnLane0Op::getTerminator() {
25152515
}
25162516

25172517
//===----------------------------------------------------------------------===//
2518-
// GPU_BroadcastLaneOp
2518+
// GPU_SubgroupBroadcastOp
25192519
//===----------------------------------------------------------------------===//
25202520

2521-
void gpu::BroadcastLaneOp::inferResultRanges(
2521+
void gpu::SubgroupBroadcastOp::inferResultRanges(
25222522
ArrayRef<ConstantIntRanges> argRanges, SetIntRangeFn setResultRange) {
25232523
setResultRange(getResult(), argRanges.front());
25242524
}
25252525

2526-
Speculation::Speculatability gpu::BroadcastLaneOp::getSpeculatability() {
2526+
Speculation::Speculatability gpu::SubgroupBroadcastOp::getSpeculatability() {
25272527
switch (getBroadcastType()) {
2528-
case BroadcastType::first_lane:
2528+
case BroadcastType::first_active_lane:
25292529
// Cannot speculate first_lane broadcast, because speculating it across
25302530
// control flow can change the active lanes.
25312531
return Speculation::NotSpeculatable;
25322532
case BroadcastType::any_lane:
25332533
LLVM_FALLTHROUGH;
2534-
case BroadcastType::lane:
2534+
case BroadcastType::specific_lane:
25352535
return Speculation::Speculatable;
25362536
}
25372537
}
25382538

2539-
LogicalResult gpu::BroadcastLaneOp::verify() {
2539+
LogicalResult gpu::SubgroupBroadcastOp::verify() {
25402540
switch (getBroadcastType()) {
2541-
case BroadcastType::first_lane:
2541+
case BroadcastType::first_active_lane:
25422542
LLVM_FALLTHROUGH;
25432543
case BroadcastType::any_lane:
25442544
if (getLane())
2545-
return emitOpError() << "lane can only be specified for lane broadcast";
2545+
return emitOpError()
2546+
<< "lane can only be specified for `specific_lane` broadcast";
25462547
return success();
2547-
case BroadcastType::lane:
2548+
case BroadcastType::specific_lane:
25482549
if (!getLane())
2549-
return emitOpError() << "lane must be specified for lane broadcast";
2550+
return emitOpError()
2551+
<< "lane must be specified for `specific_lane` broadcast";
25502552
return success();
25512553
}
25522554
}

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -812,9 +812,9 @@ func.func @broadcast(%arg0 : index, %arg1 : i32) -> (index, index, index) {
812812
// CHECK: %{{.*}} = rocdl.readfirstlane %[[ARG]] : i64
813813
// CHECK: %{{.*}} = rocdl.readfirstlane %[[ARG]] : i64
814814
// CHECK: %{{.*}} = rocdl.readlane %[[ARG]], %[[IDX]] : (i64, i32) -> i64
815-
%0 = gpu.broadcast_lane %arg0, first_lane : index
816-
%1 = gpu.broadcast_lane %arg0, any_lane : index
817-
%2 = gpu.broadcast_lane %arg0, lane %arg1 : index
815+
%0 = gpu.subgroup_broadcast %arg0, first_active_lane : index
816+
%1 = gpu.subgroup_broadcast %arg0, any_lane : index
817+
%2 = gpu.subgroup_broadcast %arg0, specific_lane %arg1 : index
818818
func.return %0, %1, %2 : index, index, index
819819
}
820820
}

mlir/test/Dialect/GPU/broadcast-speculatability.mlir

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,21 @@
33
func.func private @side_effect(%arg0 : f32, %arg1 : f32, %arg2 : f32)
44

55
// CHECK-LABEL: func @broadcast_hoisting
6-
// CHECK-SAME: (%[[ARG:.*]]: f32, %[[IDX:.*]]: i32)
7-
func.func @broadcast_hoisting(%arg0 : f32, %arg1 : i32) {
6+
// CHECK-SAME: (%[[ARG:.*]]: f32, %[[IDX:.*]]: i32, {{.*}}: index)
7+
func.func @broadcast_hoisting(%arg0 : f32, %arg1 : i32, %arg2 : index) {
88
%c0 = arith.constant 0 : index
99
%c1 = arith.constant 1 : index
10-
%c10 = arith.constant 10 : index
11-
// CHECK: %[[V1:.*]] = gpu.broadcast_lane %[[ARG]], any_lane : f32
12-
// CHECK: %[[V2:.*]] = gpu.broadcast_lane %[[ARG]], lane %[[IDX]] : f32
10+
// `any_lane` and `specific_lane` can be speculated across the control flow, but
11+
// `first_active_lane` cannot as active lanes can change.
12+
// CHECK: %[[V1:.*]] = gpu.subgroup_broadcast %[[ARG]], any_lane : f32
13+
// CHECK: %[[V2:.*]] = gpu.subgroup_broadcast %[[ARG]], specific_lane %[[IDX]] : f32
1314
// CHECK: scf.for
14-
// CHECK: %[[V0:.*]] = gpu.broadcast_lane %[[ARG]], first_lane : f32
15+
// CHECK: %[[V0:.*]] = gpu.subgroup_broadcast %[[ARG]], first_active_lane : f32
1516
// CHECK: func.call @side_effect(%[[V0]], %[[V1]], %[[V2]])
16-
scf.for %i = %c0 to %c10 step %c1 {
17-
%0 = gpu.broadcast_lane %arg0, first_lane : f32
18-
%1 = gpu.broadcast_lane %arg0, any_lane : f32
19-
%2 = gpu.broadcast_lane %arg0, lane %arg1 : f32
17+
scf.for %i = %c0 to %arg2 step %c1 {
18+
%0 = gpu.subgroup_broadcast %arg0, first_active_lane : f32
19+
%1 = gpu.subgroup_broadcast %arg0, any_lane : f32
20+
%2 = gpu.subgroup_broadcast %arg0, specific_lane %arg1 : f32
2021
func.call @side_effect(%0, %1, %2) : (f32, f32, f32) -> ()
2122
}
2223
func.return

mlir/test/Dialect/GPU/int-range-interface.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,9 @@ module attributes {gpu.container_module} {
335335
// CHECK-LABEL: func @broadcast
336336
func.func @broadcast(%idx: i32) {
337337
%0 = test.with_bounds { umin = 0 : index, umax = 10 : index, smin = 0 : index, smax = 10 : index } : index
338-
%1 = gpu.broadcast_lane %0, first_lane : index
339-
%2 = gpu.broadcast_lane %0, any_lane : index
340-
%3 = gpu.broadcast_lane %0, lane %idx : index
338+
%1 = gpu.subgroup_broadcast %0, first_active_lane : index
339+
%2 = gpu.subgroup_broadcast %0, any_lane : index
340+
%3 = gpu.subgroup_broadcast %0, specific_lane %idx : index
341341

342342
// CHECK: test.reflect_bounds {smax = 10 : index, smin = 0 : index, umax = 10 : index, umin = 0 : index}
343343
// CHECK: test.reflect_bounds {smax = 10 : index, smin = 0 : index, umax = 10 : index, umin = 0 : index}

mlir/test/Dialect/GPU/ops.mlir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -543,14 +543,14 @@ func.func @warp_operand_result(%laneid: index, %v0 : vector<4xi32>) -> (vector<4
543543
return %2 : vector<4xi32>
544544
}
545545

546-
// CHECK-LABEL: func @broadcast_lane
546+
// CHECK-LABEL: func @subgroup_broadcast
547547
// CHECK-SAME: (%[[ARG:.*]]: f32, %[[IDX:.*]]: i32)
548-
func.func @broadcast_lane(%arg0 : f32, %arg1 : i32) -> (f32, f32, f32) {
549-
// CHECK: gpu.broadcast_lane %[[ARG]], first_lane : f32
550-
%0 = gpu.broadcast_lane %arg0, first_lane : f32
551-
// CHECK: gpu.broadcast_lane %[[ARG]], any_lane : f32
552-
%1 = gpu.broadcast_lane %arg0, any_lane : f32
553-
// CHECK: gpu.broadcast_lane %[[ARG]], lane %[[IDX]] : f32
554-
%2 = gpu.broadcast_lane %arg0, lane %arg1 : f32
548+
func.func @subgroup_broadcast(%arg0 : f32, %arg1 : i32) -> (f32, f32, f32) {
549+
// CHECK: gpu.subgroup_broadcast %[[ARG]], first_active_lane : f32
550+
%0 = gpu.subgroup_broadcast %arg0, first_active_lane : f32
551+
// CHECK: gpu.subgroup_broadcast %[[ARG]], any_lane : f32
552+
%1 = gpu.subgroup_broadcast %arg0, any_lane : f32
553+
// CHECK: gpu.subgroup_broadcast %[[ARG]], specific_lane %[[IDX]] : f32
554+
%2 = gpu.subgroup_broadcast %arg0, specific_lane %arg1 : f32
555555
func.return %0, %1, %2 : f32, f32, f32
556556
}

0 commit comments

Comments
 (0)