@@ -123,7 +123,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
123
123
bits<7> saddr;
124
124
bits<10> vdst;
125
125
126
- bits<5 > cpol;
126
+ bits<6 > cpol;
127
127
128
128
// Only valid on gfx9
129
129
bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2481,7 +2481,28 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
2481
2481
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
2482
2482
}
2483
2483
2484
+ class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
2485
+ FLAT_Real_vi <op, ps, has_sccb> {
2486
+ let AssemblerPredicate = isGFX9NotGFX90A;
2487
+ let Subtarget = SIEncodingFamily.GFX9;
2488
+ let DecoderNamespace = "GFX9";
2489
+ let Inst{55} = cpol{5}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
2490
+ }
2491
+
2492
+ multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
2493
+ let AssemblerPredicate = isGFX8orGFX9After908 in { // GFX8 or GFX9's starting with 90A
2494
+ def _vi: FLAT_Real_vi<op, ps, has_sccb>;
2495
+ }
2496
+ def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
2497
+ }
2498
+
2484
2499
multiclass FLAT_Real_AllAddr_vi<bits<7> op,
2500
+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2501
+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
2502
+ defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
2503
+ }
2504
+
2505
+ multiclass FLAT_Real_AllAddr_vi_only<bits<7> op,
2485
2506
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2486
2507
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
2487
2508
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
@@ -2491,19 +2512,18 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
2491
2512
FLAT_Real <op, ps>,
2492
2513
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
2493
2514
let AssemblerPredicate = isGFX940Plus;
2494
- let DecoderNamespace = "GFX9 ";
2515
+ let DecoderNamespace = "GFX940 ";
2495
2516
let Inst{13} = ps.sve;
2496
2517
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
2497
2518
}
2498
2519
2499
2520
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
2500
- def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
2501
- let AssemblerPredicate = isGFX8GFX9NotGFX940;
2502
- let OtherPredicates = [isGFX8GFX9NotGFX940];
2503
- }
2504
- def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
2505
- let DecoderNamespace = "GFX9";
2521
+ let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2522
+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
2506
2523
}
2524
+
2525
+ defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2526
+
2507
2527
let AssemblerPredicate = isGFX940Plus in {
2508
2528
def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
2509
2529
def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2516,11 +2536,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
2516
2536
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2517
2537
2518
2538
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2519
- def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
2520
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" ;
2539
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
2540
+ defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> ;
2521
2541
}
2522
- def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
2523
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" ;
2542
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
2543
+ defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> ;
2524
2544
}
2525
2545
}
2526
2546
@@ -2536,32 +2556,41 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
2536
2556
def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
2537
2557
}
2538
2558
2539
- def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
2540
- def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
2541
- def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
2542
- def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
2543
- def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
2544
- def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
2545
- def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
2546
- def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
2547
-
2548
- def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
2549
- def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2550
- def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
2551
- def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2552
- def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
2553
- def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
2554
- def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
2555
- def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
2556
-
2557
- def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2558
- def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2559
- def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2560
- def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2561
- def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
2562
- def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
2559
+ defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
2560
+ defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
2561
+ defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
2562
+ defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
2563
+ defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
2564
+ defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
2565
+ defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
2566
+ defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
2567
+
2568
+ defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
2569
+ defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2570
+ defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
2571
+ defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2572
+ defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
2573
+ defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
2574
+ defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
2575
+ defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
2576
+
2577
+ defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2578
+ defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2579
+ defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2580
+ defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2581
+ defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
2582
+ defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
2563
2583
2564
2584
multiclass FLAT_Real_Atomics_vi <bits<7> op,
2585
+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2586
+ defvar ps = !cast<FLAT_Pseudo>(NAME);
2587
+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
2588
+ defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
2589
+ }
2590
+
2591
+ // FLAT_Real_mc_vi contains a vi variant and a gfx9 variant. In some cases, only the vi
2592
+ // variant is needed.
2593
+ multiclass FLAT_Real_Atomics_vi_only <bits<7> op,
2565
2594
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2566
2595
defvar ps = !cast<FLAT_Pseudo>(NAME);
2567
2596
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
@@ -2571,11 +2600,17 @@ multiclass FLAT_Real_Atomics_vi <bits<7> op,
2571
2600
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
2572
2601
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
2573
2602
FLAT_Real_AllAddr_vi<op, has_sccb> {
2603
+ defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
2604
+ defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
2605
+ }
2606
+
2607
+ multiclass FLAT_Global_Real_Atomics_vi_only<bits<7> op,
2608
+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
2609
+ FLAT_Real_AllAddr_vi_only<op, has_sccb> {
2574
2610
def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
2575
2611
def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
2576
2612
}
2577
2613
2578
-
2579
2614
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>;
2580
2615
defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41>;
2581
2616
defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42>;
@@ -2735,10 +2770,10 @@ let AssemblerPredicate = isGFX940Plus in {
2735
2770
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
2736
2771
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
2737
2772
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
2738
- defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi <0x4d>;
2739
- defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi <0x4e>;
2740
- defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi <0x52>;
2741
- defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi <0x52>;
2773
+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_only <0x4d>;
2774
+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_only <0x4e>;
2775
+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_only <0x52>;
2776
+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_only <0x52>;
2742
2777
} // End AssemblerPredicate = isGFX940Plus
2743
2778
2744
2779
//===----------------------------------------------------------------------===//
0 commit comments