Skip to content

Commit d09644a

Browse files
authored
[ROCDL] Added missing cluster.load.async.to.lds op (gfx1250) (llvm#169042)
* Added missing cluster.load ops with different sizes. Extended all rocdl tests
1 parent 66e18b8 commit d09644a

File tree

3 files changed

+60
-5
lines changed

3 files changed

+60
-5
lines changed

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,7 @@ foreach bitsVal = [8, 32, 64, 128] in {
924924
let arguments = !con(args, baseArgs);
925925
let assemblyFormat = [{
926926
$globalPtr `,` $ldsPtr `,` $offset `,` $aux
927-
attr-dict `:` type($globalPtr) `,` type($ldsPtr)
927+
attr-dict `:` qualified(type($globalPtr)) `,` qualified(type($ldsPtr))
928928
}];
929929
let description = [{
930930
Asynchronously loads }] # !cast<string>(bitsVal) # [{ bits of data from a global memory pointer
@@ -941,6 +941,34 @@ foreach bitsVal = [8, 32, 64, 128] in {
941941
}
942942
}
943943

944+
foreach bitsVal = [8, 32, 64, 128] in {
945+
defvar bitsStr = "b" # !cast<string>(bitsVal);
946+
def ROCDL_ClusterLoadAsyncToLDS # !toupper(bitsStr) # Op :
947+
ROCDL_IntrOp<"cluster.load.async.to.lds." # bitsStr, [], [], [], 0, 0, 1, 0, [2, 3, 4], ["offset", "cpol", "mask"]> {
948+
dag args = (ins Arg<ROCDLGlobalBuffer, "", [MemRead]>:$globalPtr,
949+
Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
950+
I32Attr:$offset,
951+
I32Attr:$cpol,
952+
I32Attr:$mask);
953+
let arguments = !con(args, baseArgs);
954+
let assemblyFormat = [{
955+
$globalPtr `,` $ldsPtr `,` $offset `,` $cpol `,` $mask
956+
attr-dict `:` qualified(type($globalPtr)) `,` qualified(type($ldsPtr))
957+
}];
958+
let description = [{
959+
Broadcasts memory load of }] # !cast<string>(bitsVal) # [{ bits of data for a cluster of workgroups.
960+
961+
Available on gfx1250+.
962+
}];
963+
964+
let extraClassDefinition = [{
965+
::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
966+
return {getGlobalPtr(), getLdsPtr()};
967+
}
968+
}];
969+
}
970+
}
971+
944972
//===---------------------------------------------------------------------===//
945973
// Tensor load/store intrinsics (available in GFX1250)
946974
//===---------------------------------------------------------------------===//

mlir/test/Dialect/LLVMIR/rocdl.mlir

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -709,13 +709,27 @@ llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3
709709
// CHECK: rocdl.global.load.async.to.lds.b32 %{{.*}}, %{{.*}}, 0, 0
710710
// CHECK: rocdl.global.load.async.to.lds.b64 %{{.*}}, %{{.*}}, 0, 0
711711
// CHECK: rocdl.global.load.async.to.lds.b128 %{{.*}}, %{{.*}}, 0, 0
712-
rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : <1>, <3>
713-
rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : <1>, <3>
714-
rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : <1>, <3>
715-
rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : <1>, <3>
712+
rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
713+
rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
714+
rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
715+
rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
716716
llvm.return
717717
}
718718

719+
llvm.func @rocdl.cluster.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
720+
// CHECK-LABEL @rocdl.cluster.load.async.to.lds
721+
// CHECK: rocdl.cluster.load.async.to.lds.b8 %{{.*}}, %{{.*}}, 0, 0, 0
722+
// CHECK: rocdl.cluster.load.async.to.lds.b32 %{{.*}}, %{{.*}}, 0, 0, 0
723+
// CHECK: rocdl.cluster.load.async.to.lds.b64 %{{.*}}, %{{.*}}, 0, 0, 0
724+
// CHECK: rocdl.cluster.load.async.to.lds.b128 %{{.*}}, %{{.*}}, 0, 0, 0
725+
rocdl.cluster.load.async.to.lds.b8 %src, %dst, 0, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
726+
rocdl.cluster.load.async.to.lds.b32 %src, %dst, 0, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
727+
rocdl.cluster.load.async.to.lds.b64 %src, %dst, 0, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
728+
rocdl.cluster.load.async.to.lds.b128 %src, %dst, 0, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
729+
llvm.return
730+
}
731+
732+
719733
// CHECK-LABEL @rocdl.tensor.load.to.lds
720734
llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,
721735
%dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,19 @@ llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3
11411141
llvm.return
11421142
}
11431143

1144+
// CHECK-LABEL: rocdl.cluster.load.async.to.lds
1145+
llvm.func @rocdl.cluster.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
1146+
// CHECK: call void @llvm.amdgcn.cluster.load.async.to.lds.b8
1147+
rocdl.cluster.load.async.to.lds.b8 %src, %dst, 0, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
1148+
// CHECK: call void @llvm.amdgcn.cluster.load.async.to.lds.b32
1149+
rocdl.cluster.load.async.to.lds.b32 %src, %dst, 0, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
1150+
// CHECK: call void @llvm.amdgcn.cluster.load.async.to.lds.b64
1151+
rocdl.cluster.load.async.to.lds.b64 %src, %dst, 0, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
1152+
// CHECK: call void @llvm.amdgcn.cluster.load.async.to.lds.b128
1153+
rocdl.cluster.load.async.to.lds.b128 %src, %dst, 0, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
1154+
llvm.return
1155+
}
1156+
11441157
// CHECK-LABEL: rocdl.tensor.load.to.lds
11451158
llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,
11461159
%dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {

0 commit comments

Comments
 (0)