@@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
125125 bits<7> saddr;
126126 bits<10> vdst;
127127
128- bits<5 > cpol;
128+ bits<6 > cpol;
129129
130130 // Only valid on gfx9
131131 bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2693,7 +2693,28 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
26932693 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
26942694}
26952695
2696+ class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
2697+ FLAT_Real_vi <op, ps, has_sccb> {
2698+ let AssemblerPredicate = isGFX9NotGFX90A;
2699+ let Subtarget = SIEncodingFamily.GFX9;
2700+ let DecoderNamespace = "GFX9";
2701+ let Inst{55} = cpol{5}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
2702+ }
2703+
2704+ multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
2705+ let AssemblerPredicate = isGFX8orGFX9After908 in { // GFX8 or GFX9's starting with 90A
2706+ def _vi: FLAT_Real_vi<op, ps, has_sccb>;
2707+ }
2708+ def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
2709+ }
2710+
26962711multiclass FLAT_Real_AllAddr_vi<bits<7> op,
2712+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2713+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
2714+ defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
2715+ }
2716+
2717+ multiclass FLAT_Real_AllAddr_vi_only<bits<7> op,
26972718 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
26982719 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
26992720 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
@@ -2703,19 +2724,18 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
27032724 FLAT_Real <op, ps>,
27042725 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
27052726 let AssemblerPredicate = isGFX940Plus;
2706- let DecoderNamespace = "GFX9 ";
2727+ let DecoderNamespace = "GFX940 ";
27072728 let Inst{13} = ps.sve;
27082729 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
27092730}
27102731
27112732multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
2712- def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
2713- let AssemblerPredicate = isGFX8GFX9NotGFX940;
2714- let OtherPredicates = [isGFX8GFX9NotGFX940];
2715- }
2716- def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
2717- let DecoderNamespace = "GFX9";
2733+ let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2734+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
27182735 }
2736+
2737+ defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2738+
27192739 let AssemblerPredicate = isGFX940Plus in {
27202740 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
27212741 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2728,11 +2748,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
27282748 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
27292749
27302750 let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2731- def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
2732- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" ;
2751+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
2752+ defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> ;
27332753 }
2734- def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
2735- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" ;
2754+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
2755+ defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> ;
27362756 }
27372757 }
27382758
@@ -2748,32 +2768,41 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
27482768 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
27492769}
27502770
2751- def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
2752- def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
2753- def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
2754- def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
2755- def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
2756- def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
2757- def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
2758- def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
2759-
2760- def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
2761- def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2762- def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
2763- def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2764- def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
2765- def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
2766- def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
2767- def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
2768-
2769- def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2770- def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2771- def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2772- def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2773- def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
2774- def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
2771+ defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
2772+ defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
2773+ defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
2774+ defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
2775+ defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
2776+ defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
2777+ defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
2778+ defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
2779+
2780+ defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
2781+ defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2782+ defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
2783+ defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2784+ defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
2785+ defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
2786+ defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
2787+ defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
2788+
2789+ defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2790+ defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2791+ defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2792+ defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2793+ defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
2794+ defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
27752795
27762796multiclass FLAT_Real_Atomics_vi <bits<7> op,
2797+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2798+ defvar ps = !cast<FLAT_Pseudo>(NAME);
2799+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
2800+ defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
2801+ }
2802+
2803+ // FLAT_Real_mc_vi contains a vi variant and a gfx9 variant. In some cases, only the vi
2804+ // variant is needed.
2805+ multiclass FLAT_Real_Atomics_vi_only <bits<7> op,
27772806 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
27782807 defvar ps = !cast<FLAT_Pseudo>(NAME);
27792808 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
@@ -2784,6 +2813,13 @@ multiclass FLAT_Real_Atomics_vi <bits<7> op,
27842813multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
27852814 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
27862815 FLAT_Real_AllAddr_vi<op, has_sccb> {
2816+ defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
2817+ defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
2818+ }
2819+
2820+ multiclass FLAT_Global_Real_Atomics_vi_only<bits<7> op,
2821+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
2822+ FLAT_Real_AllAddr_vi_only<op, has_sccb> {
27872823 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
27882824 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
27892825
@@ -2950,10 +2986,10 @@ let AssemblerPredicate = isGFX940Plus in {
29502986 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
29512987 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
29522988 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
2953- defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi <0x4d>;
2954- defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi <0x4e>;
2955- defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi <0x52>;
2956- defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi <0x52>;
2989+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_only <0x4d>;
2990+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_only <0x4e>;
2991+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_only <0x52>;
2992+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_only <0x52>;
29572993} // End AssemblerPredicate = isGFX940Plus
29582994
29592995//===----------------------------------------------------------------------===//
0 commit comments