Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,38 @@ def ROCDL_GlobalLoadLDSOp :
}];
}

//===---------------------------------------------------------------------===//
// Async load to LDS intrinsic (available in GFX1250)
//===---------------------------------------------------------------------===//

foreach bitsVal = [8, 32, 64, 128] in {
defvar bitsStr = "b" # !cast<string>(bitsVal);
def ROCDL_GlobalLoadAsyncToLDS # !toupper(bitsStr) # Op :
ROCDL_IntrOp<"global.load.async.to.lds." # bitsStr, [], [], [], 0, 0, 1, 0, [2, 3], ["offset", "aux"]> {
dag args = (ins Arg<ROCDLGlobalBuffer, "", [MemRead]>:$globalPtr,
Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
I32Attr:$offset,
I32Attr:$aux);
let arguments = !con(args, baseArgs);
let assemblyFormat = [{
$globalPtr `,` $ldsPtr `,` $offset `,` $aux
attr-dict `:` type($globalPtr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd go for type($globalPtr), type($ldsPtr) if we're doing this sort of thing

}];
let description = [{
Asynchronously loads }] # !cast<string>(bitsVal) # [{ bits of data from a global memory pointer
to a Local Data Share (LDS) pointer.

Available on gfx1250+.
}];

let extraClassDefinition = [{
::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
return {getGlobalPtr(), getLdsPtr()};
}
}];
}
}

//===---------------------------------------------------------------------===//
// Tensor load/store intrinsics (available in GFX1250)
//===---------------------------------------------------------------------===//
Expand Down
13 changes: 13 additions & 0 deletions mlir/test/Dialect/LLVMIR/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
llvm.return
}

llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
// CHECK-LABEL @rocdl.global.load.async.to.lds
// CHECK: rocdl.global.load.async.to.lds.b8 %{{.*}}, %{{.*}}, 0, 0
// CHECK: rocdl.global.load.async.to.lds.b32 %{{.*}}, %{{.*}}, 0, 0
// CHECK: rocdl.global.load.async.to.lds.b64 %{{.*}}, %{{.*}}, 0, 0
// CHECK: rocdl.global.load.async.to.lds.b128 %{{.*}}, %{{.*}}, 0, 0
rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : <1>
rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : <1>
rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : <1>
rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : <1>
llvm.return
}

// CHECK-LABEL @rocdl.tensor.load.to.lds
llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,
%dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {
Expand Down
13 changes: 13 additions & 0 deletions mlir/test/Target/LLVMIR/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
llvm.return
}

// CHECK-LABEL: rocdl.global.load.async.to.lds
llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b8
rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : !llvm.ptr<1>
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b32
rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : !llvm.ptr<1>
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b64
rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : !llvm.ptr<1>
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b128
rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : !llvm.ptr<1>
llvm.return
}

// CHECK-LABEL: rocdl.tensor.load.to.lds
llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,
%dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {
Expand Down