@@ -2019,3 +2019,97 @@ def : MIMGG16Mapping<IMAGE_SAMPLE_CD_O_nortn, IMAGE_SAMPLE_CD_O_G16_nortn>;
20192019def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_O_nortn, IMAGE_SAMPLE_CD_CL_O_G16_nortn>;
20202020def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_O_nortn, IMAGE_SAMPLE_C_CD_O_G16_nortn>;
20212021def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_O_nortn, IMAGE_SAMPLE_C_CD_CL_O_G16_nortn>;
2022+
2023+ //===----------------------------------------------------------------------===//
2024+ // VIMAGE Tensor Instructions
2025+ //===----------------------------------------------------------------------===//
2026+
2027+ class VIMAGE_TENSOR_Pseudo<string opName, bit _UpTo2D = 0> :
2028+ InstSI<(outs ), (ins ), "", []>,
2029+ SIMCInstr<opName#!if(_UpTo2D, "_D2", ""), SIEncodingFamily.NONE> {
2030+
2031+ let isPseudo = 1;
2032+ let isCodeGenOnly = 1;
2033+ string Mnemonic = opName;
2034+
2035+ let VALU = 1;
2036+ let maybeAtomic = 0;
2037+ let TENSOR_CNT = 1;
2038+ let mayLoad = 1;
2039+ let mayStore = 1;
2040+ let Uses = [EXEC, TENSORcnt];
2041+ let Defs = [TENSORcnt];
2042+ let SchedRW = [WriteVMEM, WriteLDS];
2043+ let UseNamedOperandTable = 1;
2044+ let hasSideEffects = 0;
2045+
2046+ bit UpTo2D = _UpTo2D;
2047+ let InOperandList = !if(UpTo2D, (ins SReg_128:$vaddr0, SReg_256:$vaddr1, R128A16:$r128, CPol:$cpol),
2048+ (ins SReg_128:$vaddr0, SReg_256:$vaddr1, SReg_128:$vaddr2,
2049+ SReg_128:$vaddr3, R128A16:$r128, CPol:$cpol));
2050+ string AsmOperands = " $vaddr0, $vaddr1"#!if(UpTo2D, "", ", $vaddr2, $vaddr3")#"$r128$cpol";
2051+ }
2052+
2053+ let SubtargetPredicate = isGFX1250Plus in {
2054+ def TENSOR_LOAD_TO_LDS : VIMAGE_TENSOR_Pseudo<"tensor_load_to_lds">;
2055+ def TENSOR_STORE_FROM_LDS : VIMAGE_TENSOR_Pseudo<"tensor_store_from_lds">;
2056+ def TENSOR_LOAD_TO_LDS_D2 : VIMAGE_TENSOR_Pseudo<"tensor_load_to_lds", 1>;
2057+ def TENSOR_STORE_FROM_LDS_D2 : VIMAGE_TENSOR_Pseudo<"tensor_store_from_lds", 1>;
2058+ } // End SubtargetPredicate = isGFX1250Plus.
2059+
2060+ class TensorPat <VIMAGE_TENSOR_Pseudo inst, SDPatternOperator node> : GCNPat <
2061+ (node v4i32:$vaddr0, v8i32:$vaddr1, v4i32:$vaddr2, v4i32:$vaddr3, (i32 timm:$cpol)),
2062+ (inst $vaddr0, $vaddr1, $vaddr2, $vaddr3, 0, $cpol)
2063+ >;
2064+
2065+ class TensorD2Pat <VIMAGE_TENSOR_Pseudo inst, SDPatternOperator node> : GCNPat <
2066+ (node v4i32:$vaddr0, v8i32:$vaddr1, (i32 timm:$cpol)),
2067+ (inst $vaddr0, $vaddr1, 0, $cpol)
2068+ >;
2069+
2070+ let SubtargetPredicate = isGFX1250Plus in {
2071+ def : TensorPat <TENSOR_LOAD_TO_LDS, int_amdgcn_tensor_load_to_lds>;
2072+ def : TensorPat <TENSOR_STORE_FROM_LDS, int_amdgcn_tensor_store_from_lds>;
2073+ def : TensorD2Pat <TENSOR_LOAD_TO_LDS_D2, int_amdgcn_tensor_load_to_lds_d2>;
2074+ def : TensorD2Pat <TENSOR_STORE_FROM_LDS_D2, int_amdgcn_tensor_store_from_lds_d2>;
2075+ }
2076+
2077+ class VIMAGE_TENSOR_Real <bits<8> op, VIMAGE_TENSOR_Pseudo ps, string opName = ps.Mnemonic> :
2078+ InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
2079+ VIMAGEe<op> {
2080+
2081+ // copy relevant pseudo op flags
2082+ let SubtargetPredicate = ps.SubtargetPredicate;
2083+ let TSFlags = ps.TSFlags;
2084+ let mayLoad = ps.mayLoad;
2085+ let mayStore = ps.mayStore;
2086+ let UseNamedOperandTable = ps.UseNamedOperandTable;
2087+ let SchedRW = ps.SchedRW;
2088+
2089+ // D# group 2 and 3 set to NULL for 2D or less.
2090+ let vaddr2 = !if(ps.UpTo2D, !cast<int>(SGPR_NULL_gfx11plus.HWEncoding), ?);
2091+ let vaddr3 = !if(ps.UpTo2D, !cast<int>(SGPR_NULL_gfx11plus.HWEncoding), ?);
2092+
2093+ // set to 0 based on SPG.
2094+ let vaddr4 = 0;
2095+ let rsrc = 0;
2096+ let vdata = 0;
2097+ let d16 = 0;
2098+ let a16 = 0;
2099+ let tfe = 0;
2100+ let dmask = 1; // sp3
2101+ let dim = 1; // sp3
2102+ }
2103+
2104+ multiclass VIMAGE_TENSOR_Real_gfx1250<bits<8> op> {
2105+ let AssemblerPredicate = isGFX1250Plus, DecoderNamespace = "GFX1250" in {
2106+ foreach DSuffix = ["_D2", ""] in {
2107+ defvar ps = !cast<VIMAGE_TENSOR_Pseudo>(NAME # DSuffix);
2108+ def DSuffix # _gfx1250 : VIMAGE_TENSOR_Real<op, ps, ps.Mnemonic>,
2109+ SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX1250>;
2110+ }
2111+ }
2112+ }
2113+
2114+ defm TENSOR_LOAD_TO_LDS : VIMAGE_TENSOR_Real_gfx1250<0xc4>;
2115+ defm TENSOR_STORE_FROM_LDS : VIMAGE_TENSOR_Real_gfx1250<0xc5>;
0 commit comments