Skip to content

Commit 9f2581f

Browse files
committed
[mlir][rocdl] Add GlobalLoadAsyncToLDS operation
Adds `global.load.async.to.lds` op to rocdl, supporting `b8`, `b32`, `b64` and `b128`. The op is lowered to the appropiate `llvm.amdgcn.global.load.async.to.lds.bXX` intrinsic. This is available on gfx1250+.
1 parent b2da8ef commit 9f2581f

File tree

3 files changed

+70
-0
lines changed

3 files changed

+70
-0
lines changed

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,39 @@ def ROCDL_GlobalLoadLDSOp :
663663
}];
664664
}
665665

666+
//===---------------------------------------------------------------------===//
667+
// Async load to LDS intrinsic (available in GFX1250)
668+
//===---------------------------------------------------------------------===//
669+
670+
class ROCDL_GlobalLoadAsyncToLDSOp<string mnemonic> :
671+
ROCDL_IntrOp<mnemonic, [], [], [], 0, 0, 1, 0, [2, 3], ["offset", "aux"]> {
672+
dag args = (ins Arg<LLVM_AnyPointer, "", [MemRead]>:$globalPtr,
673+
Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
674+
I32Attr:$offset,
675+
I32Attr:$aux);
676+
let arguments = !con(args, baseArgs);
677+
let assemblyFormat = [{
678+
$globalPtr `,` $ldsPtr `,` $offset `,` $aux
679+
attr-dict `:` type($globalPtr)
680+
}];
681+
let description = [{
682+
Loads data asynchronously from a global memory pointer to a local data
683+
store (LDS) pointer.
684+
685+
Available on gfx1250+.
686+
}];
687+
let extraClassDefinition = [{
688+
::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
689+
return {getGlobalPtr(), getLdsPtr()};
690+
}
691+
}];
692+
}
693+
694+
def ROCDL_GlobalLoadAsyncToLDSB8Op : ROCDL_GlobalLoadAsyncToLDSOp<"global.load.async.to.lds.b8">;
695+
def ROCDL_GlobalLoadAsyncToLDSB32Op : ROCDL_GlobalLoadAsyncToLDSOp<"global.load.async.to.lds.b32">;
696+
def ROCDL_GlobalLoadAsyncToLDSB64Op : ROCDL_GlobalLoadAsyncToLDSOp<"global.load.async.to.lds.b64">;
697+
def ROCDL_GlobalLoadAsyncToLDSB128Op : ROCDL_GlobalLoadAsyncToLDSOp<"global.load.async.to.lds.b128">;
698+
666699
//===---------------------------------------------------------------------===//
667700
// Operations on raw buffer resources (stride of 0, bounds checks either off or in
668701
// raw buffer mode).

mlir/test/Dialect/LLVMIR/rocdl.mlir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
664664
llvm.return
665665
}
666666

667+
llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
668+
// CHECK-LABEL @rocdl.global.load.async.to.lds
669+
// CHECK: rocdl.global.load.async.to.lds.b8 %{{.*}}, %{{.*}}, 0, 0
670+
// CHECK: rocdl.global.load.async.to.lds.b32 %{{.*}}, %{{.*}}, 0, 0
671+
// CHECK: rocdl.global.load.async.to.lds.b64 %{{.*}}, %{{.*}}, 0, 0
672+
// CHECK: rocdl.global.load.async.to.lds.b128 %{{.*}}, %{{.*}}, 0, 0
673+
rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : <1>
674+
rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : <1>
675+
rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : <1>
676+
rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : <1>
677+
llvm.return
678+
}
679+
667680
llvm.func @rocdl.make.buffer.rsrc(%ptr : !llvm.ptr,
668681
%stride : i16,
669682
%numRecords : i64,

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,30 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
10401040
llvm.return
10411041
}
10421042

1043+
llvm.func @rocdl.global.load.async.lds.b8(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1044+
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b8
1045+
rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : !llvm.ptr<1>
1046+
llvm.return
1047+
}
1048+
1049+
llvm.func @rocdl.global.load.async.lds.b32(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1050+
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b32
1051+
rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : !llvm.ptr<1>
1052+
llvm.return
1053+
}
1054+
1055+
llvm.func @rocdl.global.load.async.lds.b64(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1056+
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b64
1057+
rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : !llvm.ptr<1>
1058+
llvm.return
1059+
}
1060+
1061+
llvm.func @rocdl.global.load.async.lds.b128(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1062+
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b128
1063+
rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : !llvm.ptr<1>
1064+
llvm.return
1065+
}
1066+
10431067
llvm.func @rocdl.make.buffer.rsrc(%ptr : !llvm.ptr,
10441068
%stride : i16,
10451069
%numRecords : i64,

0 commit comments

Comments
 (0)