Skip to content

Commit fe4f87b

Browse files
committed
[mlir][rocdl] Add GlobalLoadAsyncToLDS operation
Adds `global.load.async.to.lds` op to rocdl, supporting `b8`, `b32`, `b64` and `b128`. The op is lowered to the appropiate `llvm.amdgcn.global.load.async.to.lds.bXX` intrinsic. This is available on gfx1250+.
1 parent f4e77e9 commit fe4f87b

File tree

3 files changed

+70
-0
lines changed

3 files changed

+70
-0
lines changed

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,39 @@ def ROCDL_GlobalLoadLDSOp :
692692
}];
693693
}
694694

695+
//===---------------------------------------------------------------------===//
696+
// Async load to LDS intrinsic (available in GFX1250)
697+
//===---------------------------------------------------------------------===//
698+
699+
class ROCDL_GlobalLoadAsyncToLDSOp<string mnemonic> :
700+
ROCDL_IntrOp<mnemonic, [], [], [], 0, 0, 1, 0, [2, 3], ["offset", "aux"]> {
701+
dag args = (ins Arg<ROCDLGlobalBuffer, "", [MemRead]>:$globalPtr,
702+
Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
703+
I32Attr:$offset,
704+
I32Attr:$aux);
705+
let arguments = !con(args, baseArgs);
706+
let assemblyFormat = [{
707+
$globalPtr `,` $ldsPtr `,` $offset `,` $aux
708+
attr-dict `:` type($globalPtr)
709+
}];
710+
let description = [{
711+
Loads data asynchronously from a global memory pointer to a local data
712+
store (LDS) pointer.
713+
714+
Available on gfx1250+.
715+
}];
716+
let extraClassDefinition = [{
717+
::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
718+
return {getGlobalPtr(), getLdsPtr()};
719+
}
720+
}];
721+
}
722+
723+
def ROCDL_GlobalLoadAsyncToLDSB8Op : ROCDL_GlobalLoadAsyncToLDSOp<"global.load.async.to.lds.b8">;
724+
def ROCDL_GlobalLoadAsyncToLDSB32Op : ROCDL_GlobalLoadAsyncToLDSOp<"global.load.async.to.lds.b32">;
725+
def ROCDL_GlobalLoadAsyncToLDSB64Op : ROCDL_GlobalLoadAsyncToLDSOp<"global.load.async.to.lds.b64">;
726+
def ROCDL_GlobalLoadAsyncToLDSB128Op : ROCDL_GlobalLoadAsyncToLDSOp<"global.load.async.to.lds.b128">;
727+
695728
//===---------------------------------------------------------------------===//
696729
// Tensor load/store intrinsics (available in GFX1250)
697730
//===---------------------------------------------------------------------===//

mlir/test/Dialect/LLVMIR/rocdl.mlir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
664664
llvm.return
665665
}
666666

667+
llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
668+
// CHECK-LABEL @rocdl.global.load.async.to.lds
669+
// CHECK: rocdl.global.load.async.to.lds.b8 %{{.*}}, %{{.*}}, 0, 0
670+
// CHECK: rocdl.global.load.async.to.lds.b32 %{{.*}}, %{{.*}}, 0, 0
671+
// CHECK: rocdl.global.load.async.to.lds.b64 %{{.*}}, %{{.*}}, 0, 0
672+
// CHECK: rocdl.global.load.async.to.lds.b128 %{{.*}}, %{{.*}}, 0, 0
673+
rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : <1>
674+
rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : <1>
675+
rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : <1>
676+
rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : <1>
677+
llvm.return
678+
}
679+
667680
// CHECK-LABEL @rocdl.tensor.load.to.lds
668681
llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,
669682
%dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,30 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
10401040
llvm.return
10411041
}
10421042

1043+
llvm.func @rocdl.global.load.async.lds.b8(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1044+
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b8
1045+
rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : !llvm.ptr<1>
1046+
llvm.return
1047+
}
1048+
1049+
llvm.func @rocdl.global.load.async.lds.b32(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1050+
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b32
1051+
rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : !llvm.ptr<1>
1052+
llvm.return
1053+
}
1054+
1055+
llvm.func @rocdl.global.load.async.lds.b64(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1056+
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b64
1057+
rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : !llvm.ptr<1>
1058+
llvm.return
1059+
}
1060+
1061+
llvm.func @rocdl.global.load.async.lds.b128(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1062+
// CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b128
1063+
rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : !llvm.ptr<1>
1064+
llvm.return
1065+
}
1066+
10431067
// CHECK-LABEL: rocdl.tensor.load.to.lds
10441068
llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,
10451069
%dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {

0 commit comments

Comments
 (0)