Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,22 @@ class ROCDL_IntrPure1Op<string mnemonic> :

class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
list<int> overloadedOperands, list<Trait> traits, int numResults,
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0,
int requiresArgAndResultAttrs = 0,
list<int> immArgPositions = [],
list<string> immArgAttrNames = []> :
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
overloadedOperands, traits, numResults, requiresAccessGroup,
requiresAliasAnalysis, 0, 0, 0, immArgPositions, immArgAttrNames>;
requiresAliasAnalysis, 0, requiresArgAndResultAttrs, 0,
immArgPositions, immArgAttrNames>;

// Subclass to save typing and ease readibility when there aren't overloaded
// operands or memory accesses.
class ROCDL_ConcreteNonMemIntrOp<string mnemonic, list<Trait> traits,
int numResults, list<int> immArgPositions = [],
list<string> immArgNames = []>
: ROCDL_IntrOp<mnemonic, [], [], traits, numResults, 0, 0,
: ROCDL_IntrOp<mnemonic, [], [], traits, numResults, 0, 0, 0,
immArgPositions, immArgNames>;
//===----------------------------------------------------------------------===//
// ROCDL special register op definitions
Expand Down Expand Up @@ -148,8 +151,11 @@ class ROCDL_DimGetterFunctionOp<string mnemonic, string device_function,
//===----------------------------------------------------------------------===//

class ROCDL_MbcntOp<string mnemonic> :
ROCDL_IntrPure1Op<"mbcnt." # mnemonic>,
Arguments<(ins I32:$in0, I32:$in1)> {
ROCDL_IntrOp<"mbcnt." # mnemonic, [], [], [Pure], 1,
0, 0, /*requiresArgAndResultAttrs=*/1> {
dag args = (ins I32:$in0, I32:$in1);
let arguments = !con(args, baseArgs);
let results = (outs I32:$res);
let assemblyFormat = [{
$in0 `,` $in1 attr-dict `:` `(` type($in0) `,` type($in1) `)` `->` type($res)
}];
Expand Down Expand Up @@ -501,7 +507,7 @@ def ROCDL_ds_read_tr16_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr16.b64">;
//===---------------------------------------------------------------------===//

def ROCDL_LoadToLDSOp :
ROCDL_IntrOp<"load.to.lds", [], [0], [], 0, 0, 1, [2, 3, 4], ["size", "offset", "aux"]> {
ROCDL_IntrOp<"load.to.lds", [], [0], [], 0, 0, 1, 0, [2, 3, 4], ["size", "offset", "aux"]> {
dag args = (ins Arg<LLVM_AnyPointer, "", [MemRead]>:$globalPtr,
Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
I32Attr:$size,
Expand All @@ -520,7 +526,7 @@ def ROCDL_LoadToLDSOp :
}

def ROCDL_GlobalLoadLDSOp :
ROCDL_IntrOp<"global.load.lds", [], [], [], 0, 0, 1, [2, 3, 4], ["size", "offset", "aux"]> {
ROCDL_IntrOp<"global.load.lds", [], [], [], 0, 0, 1, 0, [2, 3, 4], ["size", "offset", "aux"]> {
dag args = (ins Arg<ROCDLGlobalBuffer, "", [MemRead]>:$globalPtr,
Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
I32Attr:$size,
Expand Down Expand Up @@ -734,7 +740,7 @@ def ROCDL_RawBufferAtomicUMinOp :

// DPP Update intrinsic
def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0],
[AllTypesMatch<["res", "src", "old"]>], 1, 0, 0,
[AllTypesMatch<["res", "src", "old"]>], 1, 0, 0, 0,
[2, 3, 4, 5], ["dppCtrl", "rowMask", "bankMask", "boundCtrl"]>,
Arguments<(ins LLVM_Type:$old, LLVM_Type:$src, I32Attr:$dppCtrl, I32Attr:$rowMask,
I32Attr:$bankMask, I1Attr:$boundCtrl)> {
Expand All @@ -746,7 +752,7 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0],

// PermLaneX16 intrinsic operation
def ROCDL_PermlaneX16Op : ROCDL_IntrOp<"permlanex16", [], [0],
[AllTypesMatch<["res", "old", "src0"]>, AllTypesMatch<["src1", "src2"]>], 1, 0, 0,
[AllTypesMatch<["res", "old", "src0"]>, AllTypesMatch<["src1", "src2"]>], 1, 0, 0, 0,
[4, 5], ["fi", "boundControl"]>,
Arguments<(ins LLVM_Type:$old, LLVM_Type:$src0, LLVM_Type:$src1, LLVM_Type:$src2,
I1Attr:$fi, I1Attr:$boundControl)> {
Expand Down
48 changes: 29 additions & 19 deletions mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,30 @@ static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func) {
return canBeBare;
}

static Value getLaneId(ConversionPatternRewriter &rewriter, Location loc,
const unsigned indexBitwidth) {
static Value getLaneId(RewriterBase &rewriter, Location loc) {
auto int32Type = IntegerType::get(rewriter.getContext(), 32);
Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 32);
Value minus1 = arith::ConstantIntOp::create(rewriter, loc, -1, 32);
Value mbcntLo = ROCDL::MbcntLoOp::create(rewriter, loc, int32Type,
ValueRange{minus1, zero});
Value laneId = ROCDL::MbcntHiOp::create(rewriter, loc, int32Type,
ValueRange{minus1, mbcntLo});
NamedAttribute noundef = rewriter.getNamedAttr(
LLVM::LLVMDialect::getNoUndefAttrName(), rewriter.getUnitAttr());
NamedAttribute lowRange = rewriter.getNamedAttr(
LLVM::LLVMDialect::getRangeAttrName(),
LLVM::ConstantRangeAttr::get(rewriter.getContext(), APInt::getZero(32),
APInt(32, 32)));
NamedAttribute highRange = rewriter.getNamedAttr(
LLVM::LLVMDialect::getRangeAttrName(),
LLVM::ConstantRangeAttr::get(rewriter.getContext(), APInt::getZero(32),
APInt(32, 64)));
Value mbcntLo = ROCDL::MbcntLoOp::create(
rewriter, loc, int32Type, minus1, zero, /*arg_attrs=*/{},
/*res_attrs=*/
rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, lowRange})));
Value laneId = ROCDL::MbcntHiOp::create(
rewriter, loc, int32Type, minus1, mbcntLo, /*arg_attrs=*/{},
rewriter.getArrayAttr(rewriter.getDictionaryAttr({noundef, highRange})));
return laneId;
}

static constexpr StringLiteral amdgcnDataLayout =
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:"
Expand All @@ -104,18 +117,16 @@ struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
LogicalResult
matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = op->getLoc();
Location loc = op.getLoc();
MLIRContext *context = rewriter.getContext();
// convert to: %mlo = call @llvm.amdgcn.mbcnt.lo(-1, 0)
// followed by: %lid = call @llvm.amdgcn.mbcnt.hi(-1, %mlo)

Type intTy = IntegerType::get(context, 32);
Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 32);
Value minus1 = arith::ConstantIntOp::create(rewriter, loc, -1, 32);
Value mbcntLo = ROCDL::MbcntLoOp::create(rewriter, loc, intTy,
ValueRange{minus1, zero});
Value laneId = ROCDL::MbcntHiOp::create(rewriter, loc, intTy,
ValueRange{minus1, mbcntLo});
// convert to:
// %mlo = call noundef range(i32 0, 32)
// @llvm.amdgcn.mbcnt.lo(-1, 0)
// followed by:
// %lid = call noundef range(i32 0, 64)
// @llvm.amdgcn.mbcnt.hi(-1, %mlo)

Value laneId = getLaneId(rewriter, loc);
// Truncate or extend the result depending on the index bitwidth specified
// by the LLVMTypeConverter options.
const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
Expand Down Expand Up @@ -185,8 +196,7 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
Location loc = op->getLoc();
Value initShflValue = adaptor.getValue();

const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
Value srcLaneId = getLaneId(rewriter, loc, indexBitwidth);
Value srcLaneId = getLaneId(rewriter, loc);

auto int32Type = IntegerType::get(rewriter.getContext(), 32);
Value width = adaptor.getWidth();
Expand Down
6 changes: 3 additions & 3 deletions mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ gpu.module @test_module {
// CHECK: = llvm.sext %{{.*}} : i32 to i64
%gDimZ = gpu.grid_dim z

// CHECK: = rocdl.mbcnt.lo %{{.*}}, %{{.*}} : (i32, i32) -> i32
// CHECK: = rocdl.mbcnt.hi %{{.*}}, %{{.*}} : (i32, i32) -> i32
// CHECK: = rocdl.mbcnt.lo %{{.*}}, %{{.*}} {res_attrs = [{llvm.noundef, llvm.range = #llvm.constant_range<i32, 0, 32>}]} : (i32, i32) -> i32
// CHECK: = rocdl.mbcnt.hi %{{.*}}, %{{.*}} {res_attrs = [{llvm.noundef, llvm.range = #llvm.constant_range<i32, 0, 64>}]} : (i32, i32) -> i32
// CHECK: = llvm.sext %{{.*}} : i32 to i64
%laneId = gpu.lane_id

Expand Down Expand Up @@ -701,7 +701,7 @@ gpu.module @test_module {
// CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32
// CHECK: %[[#PERMUTE:]] = rocdl.ds_bpermute %[[#ALIGNED_DST_LANE]], %[[#CAST_VALUE]] : (i32, i32) -> i32
// CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#PERMUTE]] : i32 to f32
%shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32
%shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32
// *** UP mode shuffle ***
// CHECK: %[[#LANE_ID:]] = rocdl.mbcnt.hi
// CHECK: %[[#ZERO:]] = llvm.mlir.constant(0 : i32) : i32
Expand Down
8 changes: 4 additions & 4 deletions mlir/test/Target/LLVMIR/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,12 @@ llvm.func @kernel_func_unsafe_fp_atomics()
}

llvm.func @rocdl.lane_id() -> i32 {
// CHECK: [[mbcntlo:%.+]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
// CHECK-NEXT: call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[mbcntlo]])
// CHECK: [[mbcntlo:%.+]] = call noundef range(i32 0, 32) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
// CHECK-NEXT: call noundef range(i32 0, 64) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[mbcntlo]])
%0 = llvm.mlir.constant(-1 : i32) : i32
%1 = llvm.mlir.constant(0 : i32) : i32
%2 = rocdl.mbcnt.lo %0, %1 : (i32, i32) -> i32
%3 = rocdl.mbcnt.hi %0, %2 : (i32, i32) -> i32
%2 = rocdl.mbcnt.lo %0, %1 {res_attrs = [{llvm.noundef, llvm.range = #llvm.constant_range<i32, 0, 32>}]} : (i32, i32) -> i32
%3 = rocdl.mbcnt.hi %0, %2 {res_attrs = [{llvm.noundef, llvm.range = #llvm.constant_range<i32, 0, 64>}]} : (i32, i32) -> i32
llvm.return %3 : i32
}

Expand Down