@@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
125125 bits<7> saddr;
126126 bits<10> vdst;
127127
128- bits<5 > cpol;
128+ bits<6 > cpol;
129129
130130 // Only valid on gfx9
131131 bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2581,7 +2581,28 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
25812581 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
25822582}
25832583
2584+ class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
2585+ FLAT_Real_vi <op, ps, has_sccb> {
2586+ let AssemblerPredicate = isGFX9NotGFX90A;
2587+ let Subtarget = SIEncodingFamily.GFX9;
2588+ let DecoderNamespace = "GFX9";
2589+ let Inst{55} = cpol{5}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
2590+ }
2591+
2592+ multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
2593+ let AssemblerPredicate = isGFX8orGFX9After908 in { // GFX8 or GFX9's starting with 90A
2594+ def _vi: FLAT_Real_vi<op, ps, has_sccb>;
2595+ }
2596+ def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
2597+ }
2598+
25842599multiclass FLAT_Real_AllAddr_vi<bits<7> op,
2600+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2601+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
2602+ defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
2603+ }
2604+
2605+ multiclass FLAT_Real_AllAddr_vi_only<bits<7> op,
25852606 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
25862607 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
25872608 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
@@ -2591,19 +2612,18 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
25912612 FLAT_Real <op, ps>,
25922613 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
25932614 let AssemblerPredicate = isGFX940Plus;
2594- let DecoderNamespace = "GFX9 ";
2615+ let DecoderNamespace = "GFX940 ";
25952616 let Inst{13} = ps.sve;
25962617 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
25972618}
25982619
25992620multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
2600- def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
2601- let AssemblerPredicate = isGFX8GFX9NotGFX940;
2602- let OtherPredicates = [isGFX8GFX9NotGFX940];
2603- }
2604- def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
2605- let DecoderNamespace = "GFX9";
2621+ let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2622+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
26062623 }
2624+
2625+ defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2626+
26072627 let AssemblerPredicate = isGFX940Plus in {
26082628 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
26092629 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2616,11 +2636,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
26162636 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
26172637
26182638 let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2619- def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
2620- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" ;
2639+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
2640+ defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> ;
26212641 }
2622- def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
2623- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" ;
2642+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
2643+ defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> ;
26242644 }
26252645 }
26262646
@@ -2636,32 +2656,41 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
26362656 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
26372657}
26382658
2639- def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
2640- def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
2641- def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
2642- def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
2643- def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
2644- def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
2645- def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
2646- def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
2647-
2648- def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
2649- def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2650- def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
2651- def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2652- def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
2653- def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
2654- def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
2655- def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
2656-
2657- def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2658- def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2659- def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2660- def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2661- def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
2662- def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
2659+ defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
2660+ defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
2661+ defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
2662+ defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
2663+ defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
2664+ defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
2665+ defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
2666+ defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
2667+
2668+ defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
2669+ defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2670+ defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
2671+ defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2672+ defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
2673+ defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
2674+ defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
2675+ defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
2676+
2677+ defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2678+ defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2679+ defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2680+ defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2681+ defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
2682+ defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
26632683
26642684multiclass FLAT_Real_Atomics_vi <bits<7> op,
2685+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2686+ defvar ps = !cast<FLAT_Pseudo>(NAME);
2687+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
2688+ defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
2689+ }
2690+
2691+ // FLAT_Real_mc_vi contains a vi variant and a gfx9 variant. In some cases, only the vi
2692+ // variant is needed.
2693+ multiclass FLAT_Real_Atomics_vi_only <bits<7> op,
26652694 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
26662695 defvar ps = !cast<FLAT_Pseudo>(NAME);
26672696 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
@@ -2672,6 +2701,13 @@ multiclass FLAT_Real_Atomics_vi <bits<7> op,
26722701multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
26732702 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
26742703 FLAT_Real_AllAddr_vi<op, has_sccb> {
2704+ defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
2705+ defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
2706+ }
2707+
2708+ multiclass FLAT_Global_Real_Atomics_vi_only<bits<7> op,
2709+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
2710+ FLAT_Real_AllAddr_vi_only<op, has_sccb> {
26752711 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
26762712 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
26772713
@@ -2838,10 +2874,10 @@ let AssemblerPredicate = isGFX940Plus in {
28382874 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
28392875 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
28402876 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
2841- defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi <0x4d>;
2842- defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi <0x4e>;
2843- defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi <0x52>;
2844- defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi <0x52>;
2877+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_only <0x4d>;
2878+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_only <0x4e>;
2879+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_only <0x52>;
2880+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_only <0x52>;
28452881} // End AssemblerPredicate = isGFX940Plus
28462882
28472883//===----------------------------------------------------------------------===//
0 commit comments