Skip to content

Commit bbe3d64

Browse files
authored
[mlir][ROCDL] Annotate lane ID functions with noundef, ranges (llvm#151396)
Now that we have general support for setting argument and result attributes on LLVM intrinsics, extend the definitions of mbcnt.lo and mbcnt.hi to carry such attributes. With that, update the construction of the mbcnt.lo/mbcnt.hi calls used to get the lane ID to be `noundef` (since the lane ID is always defined) and to be annotated with the correct ranges (so that generic LLVM passes can correctly optimized based on the fact that there are never more than 32/64 lanes). (Also, handle a pattern that wasn't using getLaneId() and get rid of a dead argument)
1 parent bcb48aa commit bbe3d64

File tree

4 files changed

+51
-35
lines changed

4 files changed

+51
-35
lines changed

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,22 @@ class ROCDL_IntrPure1Op<string mnemonic> :
9393

9494
class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
9595
list<int> overloadedOperands, list<Trait> traits, int numResults,
96-
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
96+
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0,
97+
int requiresArgAndResultAttrs = 0,
98+
list<int> immArgPositions = [],
9799
list<string> immArgAttrNames = []> :
98100
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
99101
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
100102
overloadedOperands, traits, numResults, requiresAccessGroup,
101-
requiresAliasAnalysis, 0, 0, 0, immArgPositions, immArgAttrNames>;
103+
requiresAliasAnalysis, 0, requiresArgAndResultAttrs, 0,
104+
immArgPositions, immArgAttrNames>;
102105

103106
// Subclass to save typing and ease readibility when there aren't overloaded
104107
// operands or memory accesses.
105108
class ROCDL_ConcreteNonMemIntrOp<string mnemonic, list<Trait> traits,
106109
int numResults, list<int> immArgPositions = [],
107110
list<string> immArgNames = []>
108-
: ROCDL_IntrOp<mnemonic, [], [], traits, numResults, 0, 0,
111+
: ROCDL_IntrOp<mnemonic, [], [], traits, numResults, 0, 0, 0,
109112
immArgPositions, immArgNames>;
110113
//===----------------------------------------------------------------------===//
111114
// ROCDL special register op definitions
@@ -148,8 +151,11 @@ class ROCDL_DimGetterFunctionOp<string mnemonic, string device_function,
148151
//===----------------------------------------------------------------------===//
149152

150153
class ROCDL_MbcntOp<string mnemonic> :
151-
ROCDL_IntrPure1Op<"mbcnt." # mnemonic>,
152-
Arguments<(ins I32:$in0, I32:$in1)> {
154+
ROCDL_IntrOp<"mbcnt." # mnemonic, [], [], [Pure], 1,
155+
0, 0, /*requiresArgAndResultAttrs=*/1> {
156+
dag args = (ins I32:$in0, I32:$in1);
157+
let arguments = !con(args, baseArgs);
158+
let results = (outs I32:$res);
153159
let assemblyFormat = [{
154160
$in0 `,` $in1 attr-dict `:` `(` type($in0) `,` type($in1) `)` `->` type($res)
155161
}];
@@ -515,7 +521,7 @@ def ROCDL_ds_read_tr16_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr16.b64">;
515521
//===---------------------------------------------------------------------===//
516522

517523
def ROCDL_LoadToLDSOp :
518-
ROCDL_IntrOp<"load.to.lds", [], [0], [], 0, 0, 1, [2, 3, 4], ["size", "offset", "aux"]> {
524+
ROCDL_IntrOp<"load.to.lds", [], [0], [], 0, 0, 1, 0, [2, 3, 4], ["size", "offset", "aux"]> {
519525
dag args = (ins Arg<LLVM_AnyPointer, "", [MemRead]>:$globalPtr,
520526
Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
521527
I32Attr:$size,
@@ -534,7 +540,7 @@ def ROCDL_LoadToLDSOp :
534540
}
535541

536542
def ROCDL_GlobalLoadLDSOp :
537-
ROCDL_IntrOp<"global.load.lds", [], [], [], 0, 0, 1, [2, 3, 4], ["size", "offset", "aux"]> {
543+
ROCDL_IntrOp<"global.load.lds", [], [], [], 0, 0, 1, 0, [2, 3, 4], ["size", "offset", "aux"]> {
538544
dag args = (ins Arg<ROCDLGlobalBuffer, "", [MemRead]>:$globalPtr,
539545
Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
540546
I32Attr:$size,
@@ -748,7 +754,7 @@ def ROCDL_RawBufferAtomicUMinOp :
748754

749755
// DPP Update intrinsic
750756
def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0],
751-
[AllTypesMatch<["res", "src", "old"]>], 1, 0, 0,
757+
[AllTypesMatch<["res", "src", "old"]>], 1, 0, 0, 0,
752758
[2, 3, 4, 5], ["dppCtrl", "rowMask", "bankMask", "boundCtrl"]>,
753759
Arguments<(ins LLVM_Type:$old, LLVM_Type:$src, I32Attr:$dppCtrl, I32Attr:$rowMask,
754760
I32Attr:$bankMask, I1Attr:$boundCtrl)> {
@@ -760,7 +766,7 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0],
760766

761767
// PermLaneX16 intrinsic operation
762768
def ROCDL_PermlaneX16Op : ROCDL_IntrOp<"permlanex16", [], [0],
763-
[AllTypesMatch<["res", "old", "src0"]>, AllTypesMatch<["src1", "src2"]>], 1, 0, 0,
769+
[AllTypesMatch<["res", "old", "src0"]>, AllTypesMatch<["src1", "src2"]>], 1, 0, 0, 0,
764770
[4, 5], ["fi", "boundControl"]>,
765771
Arguments<(ins LLVM_Type:$old, LLVM_Type:$src0, LLVM_Type:$src1, LLVM_Type:$src2,
766772
I1Attr:$fi, I1Attr:$boundControl)> {

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,30 @@ static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func) {
7979
return canBeBare;
8080
}
8181

82-
static Value getLaneId(ConversionPatternRewriter &rewriter, Location loc,
83-
const unsigned indexBitwidth) {
82+
static Value getLaneId(RewriterBase &rewriter, Location loc) {
8483
auto int32Type = IntegerType::get(rewriter.getContext(), 32);
8584
Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 32);
8685
Value minus1 = arith::ConstantIntOp::create(rewriter, loc, -1, 32);
87-
Value mbcntLo = ROCDL::MbcntLoOp::create(rewriter, loc, int32Type,
88-
ValueRange{minus1, zero});
89-
Value laneId = ROCDL::MbcntHiOp::create(rewriter, loc, int32Type,
90-
ValueRange{minus1, mbcntLo});
86+
NamedAttribute noundef = rewriter.getNamedAttr(
87+
LLVM::LLVMDialect::getNoUndefAttrName(), rewriter.getUnitAttr());
88+
NamedAttribute lowRange = rewriter.getNamedAttr(
89+
LLVM::LLVMDialect::getRangeAttrName(),
90+
LLVM::ConstantRangeAttr::get(rewriter.getContext(), APInt::getZero(32),
91+
APInt(32, 32)));
92+
NamedAttribute highRange = rewriter.getNamedAttr(
93+
LLVM::LLVMDialect::getRangeAttrName(),
94+
LLVM::ConstantRangeAttr::get(rewriter.getContext(), APInt::getZero(32),
95+
APInt(32, 64)));
96+
Value mbcntLo = ROCDL::MbcntLoOp::create(
97+
rewriter, loc, int32Type, minus1, zero, /*arg_attrs=*/{},
98+
/*res_attrs=*/
99+
rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, lowRange})));
100+
Value laneId = ROCDL::MbcntHiOp::create(
101+
rewriter, loc, int32Type, minus1, mbcntLo, /*arg_attrs=*/{},
102+
rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, highRange})));
91103
return laneId;
92104
}
105+
93106
static constexpr StringLiteral amdgcnDataLayout =
94107
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
95108
"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:"
@@ -104,18 +117,16 @@ struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
104117
LogicalResult
105118
matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
106119
ConversionPatternRewriter &rewriter) const override {
107-
auto loc = op->getLoc();
120+
Location loc = op.getLoc();
108121
MLIRContext *context = rewriter.getContext();
109-
// convert to: %mlo = call @llvm.amdgcn.mbcnt.lo(-1, 0)
110-
// followed by: %lid = call @llvm.amdgcn.mbcnt.hi(-1, %mlo)
111-
112-
Type intTy = IntegerType::get(context, 32);
113-
Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 32);
114-
Value minus1 = arith::ConstantIntOp::create(rewriter, loc, -1, 32);
115-
Value mbcntLo = ROCDL::MbcntLoOp::create(rewriter, loc, intTy,
116-
ValueRange{minus1, zero});
117-
Value laneId = ROCDL::MbcntHiOp::create(rewriter, loc, intTy,
118-
ValueRange{minus1, mbcntLo});
122+
// convert to:
123+
// %mlo = call noundef range(i32 0, 32)
124+
// @llvm.amdgcn.mbcnt.lo(-1, 0)
125+
// followed by:
126+
// %lid = call noundef range(i32 0, 64)
127+
// @llvm.amdgcn.mbcnt.hi(-1, %mlo)
128+
129+
Value laneId = getLaneId(rewriter, loc);
119130
// Truncate or extend the result depending on the index bitwidth specified
120131
// by the LLVMTypeConverter options.
121132
const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
@@ -185,8 +196,7 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
185196
Location loc = op->getLoc();
186197
Value initShflValue = adaptor.getValue();
187198

188-
const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
189-
Value srcLaneId = getLaneId(rewriter, loc, indexBitwidth);
199+
Value srcLaneId = getLaneId(rewriter, loc);
190200

191201
auto int32Type = IntegerType::get(rewriter.getContext(), 32);
192202
Value width = adaptor.getWidth();

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ gpu.module @test_module {
5454
// CHECK: = llvm.sext %{{.*}} : i32 to i64
5555
%gDimZ = gpu.grid_dim z
5656

57-
// CHECK: = rocdl.mbcnt.lo %{{.*}}, %{{.*}} : (i32, i32) -> i32
58-
// CHECK: = rocdl.mbcnt.hi %{{.*}}, %{{.*}} : (i32, i32) -> i32
57+
// CHECK: = rocdl.mbcnt.lo %{{.*}}, %{{.*}} {res_attrs = [{llvm.noundef, llvm.range = #llvm.constant_range<i32, 0, 32>}]} : (i32, i32) -> i32
58+
// CHECK: = rocdl.mbcnt.hi %{{.*}}, %{{.*}} {res_attrs = [{llvm.noundef, llvm.range = #llvm.constant_range<i32, 0, 64>}]} : (i32, i32) -> i32
5959
// CHECK: = llvm.sext %{{.*}} : i32 to i64
6060
%laneId = gpu.lane_id
6161

@@ -701,7 +701,7 @@ gpu.module @test_module {
701701
// CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32
702702
// CHECK: %[[#PERMUTE:]] = rocdl.ds_bpermute %[[#ALIGNED_DST_LANE]], %[[#CAST_VALUE]] : (i32, i32) -> i32
703703
// CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#PERMUTE]] : i32 to f32
704-
%shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32
704+
%shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32
705705
// *** UP mode shuffle ***
706706
// CHECK: %[[#LANE_ID:]] = rocdl.mbcnt.hi
707707
// CHECK: %[[#ZERO:]] = llvm.mlir.constant(0 : i32) : i32

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,12 @@ llvm.func @kernel_func_unsafe_fp_atomics()
8686
}
8787

8888
llvm.func @rocdl.lane_id() -> i32 {
89-
// CHECK: [[mbcntlo:%.+]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
90-
// CHECK-NEXT: call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[mbcntlo]])
89+
// CHECK: [[mbcntlo:%.+]] = call noundef range(i32 0, 32) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
90+
// CHECK-NEXT: call noundef range(i32 0, 64) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[mbcntlo]])
9191
%0 = llvm.mlir.constant(-1 : i32) : i32
9292
%1 = llvm.mlir.constant(0 : i32) : i32
93-
%2 = rocdl.mbcnt.lo %0, %1 : (i32, i32) -> i32
94-
%3 = rocdl.mbcnt.hi %0, %2 : (i32, i32) -> i32
93+
%2 = rocdl.mbcnt.lo %0, %1 {res_attrs = [{llvm.noundef, llvm.range = #llvm.constant_range<i32, 0, 32>}]} : (i32, i32) -> i32
94+
%3 = rocdl.mbcnt.hi %0, %2 {res_attrs = [{llvm.noundef, llvm.range = #llvm.constant_range<i32, 0, 64>}]} : (i32, i32) -> i32
9595
llvm.return %3 : i32
9696
}
9797

0 commit comments

Comments
 (0)