@@ -321,6 +321,7 @@ def ROCDL_BarrierOp : ROCDL_Op<"barrier"> {
321321 let assemblyFormat = "attr-dict";
322322}
323323
324+ def ROCDLGlobalBuffer : LLVM_PointerInAddressSpace<1>;
324325def ROCDLBufferLDS : LLVM_PointerInAddressSpace<3>;
325326
326327def ROCDL_BarrierInitOp : ROCDL_IntrOp<"s.barrier.init", [], [], [], 0, 0, 0, 0, [1], ["id"]>,
@@ -631,8 +632,6 @@ def ROCDL_wmma_i32_16x16x64_iu8 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x64.iu8", [1]
631632//===---------------------------------------------------------------------===//
632633// LDS transpose intrinsics (available in GFX950)
633634
634- def ROCDLGlobalBuffer : LLVM_PointerInAddressSpace<1>;
635-
636635class ROCDL_LDS_Read_Tr_IntrOp<string mnemonic> :
637636 ROCDL_IntrOp<mnemonic, [1], [], [], 1, 0, 1> {
638637 dag args = (ins Arg<ROCDLBufferLDS, "", [MemRead]>:$ptr);
@@ -650,6 +649,58 @@ def ROCDL_ds_read_tr8_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr8.b64">;
650649def ROCDL_ds_read_tr6_b96 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr6.b96">;
651650def ROCDL_ds_read_tr16_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr16.b64">;
652651
652+
653+
654+ //===---------------------------------------------------------------------===//
655+ // Glb/DS load-transpose intrinsics (available in GFX1250+)
656+
657+ class AddrKind<string n, int s> {
658+ string name = n;
659+ int space = s;
660+ }
661+ def GlobalAddrKind : AddrKind<"global", 1>;
662+ def DSAddrKind : AddrKind<"ds", 3>;
663+
664+ class ROCDL_TrLoadOpMeta<AddrKind kind, int inElemBits, int outElemBits> {
665+ AddrKind addrKind = kind;
666+ string inBits = !cast<string>(inElemBits);
667+ string outBits = !cast<string>(outElemBits);
668+ string inBitsEnc = !if(!eq(addrKind.space, 1),
669+ !if(!or(!eq(inElemBits, 8), !eq(inElemBits, 16)), "", inBits), inBits);
670+ string mnemonic = addrKind.name # ".load.tr" # inBitsEnc # ".b" # outBits;
671+ }
672+
673+ class ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta meta> :
674+ ROCDL_IntrOp<meta.mnemonic, [1], [], [], 1, 0, 1> {
675+
676+ dag args = (ins Arg<LLVM_PointerInAddressSpace<meta.addrKind.space>, "", [MemRead]>:$ptr);
677+ let arguments = !con(args, baseArgs);
678+ let summary = "Loads and transposes a matrix from " # meta.addrKind.name # " memory to registers (available in gfx1250+).";
679+ let description = [{
680+ Load a matrix of }] # meta.inBits # [{-bit data from the }] # meta.addrKind.name # [{ memory,
681+ transpose data between row-major and column-major order,
682+ and store the result into a }] # meta.outBits # [{-bit vector register.
683+
684+ Available in gfx1250+.
685+ }];
686+ let assemblyFormat = "$ptr attr-dict `:` qualified(type($ptr)) `->` type($res)";
687+ let extraClassDefinition = [{
688+ ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
689+ return {getPtr()};
690+ }
691+ }];
692+ }
693+
694+ def ROCDL_GlobalLoadTr4_B64 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 4, 64>>;
695+ def ROCDL_GlobalLoadTr8_B64 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 8, 64>>;
696+ def ROCDL_GlobalLoadTr6_B96 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 6, 96>>;
697+ def ROCDL_GlobalLoadTr8_B128 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 16, 128>>;
698+
699+ def ROCDL_DsLoadTr4_B64 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<DSAddrKind, 4, 64>>;
700+ def ROCDL_DsLoadTr8_B64 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<DSAddrKind, 8, 64>>;
701+ def ROCDL_DsLoadTr6_B96 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<DSAddrKind, 6, 96>>;
702+ def ROCDL_DsLoadTr16_B128 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<DSAddrKind, 16, 128>>;
703+
653704//===---------------------------------------------------------------------===//
654705// Load to LDS intrinsic (available in GFX9 and GFX10)
655706//===---------------------------------------------------------------------===//
0 commit comments