Skip to content

Commit 87b1908

Browse files
committed
Define new real instructions in table-gen files so that there's
no need to do the encoding/decoding in CPP code.
1 parent 23457ee commit 87b1908

File tree

5 files changed

+88
-64
lines changed

5 files changed

+88
-64
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,6 +2329,16 @@ def isGFX8GFX9NotGFX90A :
23292329
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
23302330
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
23312331

2332+
def isGFX9NotGFX90A :
2333+
Predicate<"!Subtarget->hasGFX90AInsts() &&"
2334+
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
2335+
AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>;
2336+
2337+
def isGFX8orGFX9After908 :
2338+
Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||"
2339+
" ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">,
2340+
AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>;
2341+
23322342
def isGFX90AOnly :
23332343
Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
23342344
AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -788,19 +788,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
788788
}
789789
}
790790

791-
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::FLAT) {
792-
if (isGFX9() && !isGFX90A()) {
793-
// Pre-GFX90A GFX9's use bit 55 as NV.
794-
assert(Bytes_.size() >= 8);
795-
if (Bytes_[6] & 0x80) { // check bit 55
796-
int CPolIdx =
797-
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
798-
MI.getOperand(CPolIdx).setImm(MI.getOperand(CPolIdx).getImm() |
799-
AMDGPU::CPol::NV);
800-
}
801-
}
802-
}
803-
804791
if ((MCII->get(MI.getOpcode()).TSFlags &
805792
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
806793
(STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 76 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
125125
bits<7> saddr;
126126
bits<10> vdst;
127127

128-
bits<5> cpol;
128+
bits<6> cpol;
129129

130130
// Only valid on gfx9
131131
bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2581,7 +2581,28 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
25812581
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
25822582
}
25832583

2584+
class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
2585+
FLAT_Real_vi <op, ps, has_sccb> {
2586+
let AssemblerPredicate = isGFX9NotGFX90A;
2587+
let Subtarget = SIEncodingFamily.GFX9;
2588+
let DecoderNamespace = "GFX9";
2589+
let Inst{55} = cpol{5}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
2590+
}
2591+
2592+
multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
2593+
let AssemblerPredicate = isGFX8orGFX9After908 in { // GFX8 or GFX9's starting with 90A
2594+
def _vi: FLAT_Real_vi<op, ps, has_sccb>;
2595+
}
2596+
def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
2597+
}
2598+
25842599
multiclass FLAT_Real_AllAddr_vi<bits<7> op,
2600+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2601+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
2602+
defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
2603+
}
2604+
2605+
multiclass FLAT_Real_AllAddr_vi_only<bits<7> op,
25852606
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
25862607
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
25872608
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
@@ -2591,19 +2612,18 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
25912612
FLAT_Real <op, ps>,
25922613
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
25932614
let AssemblerPredicate = isGFX940Plus;
2594-
let DecoderNamespace = "GFX9";
2615+
let DecoderNamespace = "GFX940";
25952616
let Inst{13} = ps.sve;
25962617
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
25972618
}
25982619

25992620
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
2600-
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
2601-
let AssemblerPredicate = isGFX8GFX9NotGFX940;
2602-
let OtherPredicates = [isGFX8GFX9NotGFX940];
2603-
}
2604-
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
2605-
let DecoderNamespace = "GFX9";
2621+
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2622+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
26062623
}
2624+
2625+
defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2626+
26072627
let AssemblerPredicate = isGFX940Plus in {
26082628
def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
26092629
def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2616,11 +2636,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
26162636
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
26172637

26182638
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2619-
def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
2620-
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
2639+
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
2640+
defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
26212641
}
2622-
def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
2623-
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
2642+
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
2643+
defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
26242644
}
26252645
}
26262646

@@ -2636,32 +2656,41 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
26362656
def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
26372657
}
26382658

2639-
def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
2640-
def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
2641-
def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
2642-
def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
2643-
def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
2644-
def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
2645-
def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
2646-
def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
2647-
2648-
def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
2649-
def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2650-
def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
2651-
def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2652-
def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
2653-
def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
2654-
def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
2655-
def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
2656-
2657-
def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2658-
def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2659-
def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2660-
def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2661-
def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
2662-
def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
2659+
defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
2660+
defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
2661+
defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
2662+
defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
2663+
defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
2664+
defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
2665+
defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
2666+
defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
2667+
2668+
defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
2669+
defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2670+
defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
2671+
defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2672+
defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
2673+
defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
2674+
defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
2675+
defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
2676+
2677+
defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2678+
defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2679+
defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2680+
defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2681+
defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
2682+
defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
26632683

26642684
multiclass FLAT_Real_Atomics_vi <bits<7> op,
2685+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2686+
defvar ps = !cast<FLAT_Pseudo>(NAME);
2687+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
2688+
defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
2689+
}
2690+
2691+
// FLAT_Real_mc_vi contains a vi variant and a gfx9 variant. In some cases, only the vi
2692+
// variant is needed.
2693+
multiclass FLAT_Real_Atomics_vi_only <bits<7> op,
26652694
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
26662695
defvar ps = !cast<FLAT_Pseudo>(NAME);
26672696
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
@@ -2672,6 +2701,13 @@ multiclass FLAT_Real_Atomics_vi <bits<7> op,
26722701
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
26732702
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
26742703
FLAT_Real_AllAddr_vi<op, has_sccb> {
2704+
defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
2705+
defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
2706+
}
2707+
2708+
multiclass FLAT_Global_Real_Atomics_vi_only<bits<7> op,
2709+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
2710+
FLAT_Real_AllAddr_vi_only<op, has_sccb> {
26752711
def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
26762712
def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
26772713

@@ -2838,10 +2874,10 @@ let AssemblerPredicate = isGFX940Plus in {
28382874
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
28392875
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
28402876
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
2841-
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
2842-
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
2843-
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
2844-
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
2877+
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_only<0x4d>;
2878+
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_only<0x4e>;
2879+
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_only<0x52>;
2880+
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_only<0x52>;
28452881
} // End AssemblerPredicate = isGFX940Plus
28462882

28472883
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,12 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
176176
O << " dlc";
177177
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
178178
O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
179-
if (Imm & ~CPol::ALL_pregfx12)
179+
if (Imm & ~CPol::ALL_pregfx12) {
180180
if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
181181
O << " nv";
182182
else
183183
O << " /* unexpected cache policy bit */";
184+
}
184185
}
185186

186187
void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -394,16 +394,6 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
394394
Encoding |= getImplicitOpSelHiEncoding(Opcode);
395395
}
396396

397-
// For GFX90A+ targets, bit 55 of the FLAT instructions is the ACC bit
398-
// indicating the use of AGPRs. However, pre-GFX90A, the same bit is for NV.
399-
if ((Desc.TSFlags & SIInstrFlags::FLAT) && AMDGPU::isGFX9(STI) &&
400-
!AMDGPU::isGFX90A(STI)) {
401-
int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
402-
unsigned Cpol = MI.getOperand(Idx).getImm();
403-
if (Cpol & AMDGPU::CPol::NV)
404-
Encoding |= (UINT64_C(1) << 55);
405-
}
406-
407397
// GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
408398
// Documentation requires dst to be encoded as EXEC (0x7E),
409399
// but it looks like the actual value encoded for dst operand

0 commit comments

Comments
 (0)