Skip to content

Commit 7cfdcdb

Browse files
committed
Define new real instructions in table-gen files so that there's
no need to do the encoding/decoding in CPP code.
1 parent 40c04b7 commit 7cfdcdb

File tree

5 files changed

+88
-64
lines changed

5 files changed

+88
-64
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2344,6 +2344,16 @@ def isGFX8GFX9NotGFX90A :
23442344
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
23452345
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
23462346

2347+
def isGFX9NotGFX90A :
2348+
Predicate<"!Subtarget->hasGFX90AInsts() &&"
2349+
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
2350+
AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>;
2351+
2352+
def isGFX8orGFX9After908 :
2353+
Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||"
2354+
" ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">,
2355+
AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>;
2356+
23472357
def isGFX90AOnly :
23482358
Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
23492359
AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -791,19 +791,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
791791
}
792792
}
793793

794-
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::FLAT) {
795-
if (isGFX9() && !isGFX90A()) {
796-
// Pre-GFX90A GFX9's use bit 55 as NV.
797-
assert(Bytes_.size() >= 8);
798-
if (Bytes_[6] & 0x80) { // check bit 55
799-
int CPolIdx =
800-
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
801-
MI.getOperand(CPolIdx).setImm(MI.getOperand(CPolIdx).getImm() |
802-
AMDGPU::CPol::NV);
803-
}
804-
}
805-
}
806-
807794
if ((MCII->get(MI.getOpcode()).TSFlags &
808795
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
809796
(STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 76 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
125125
bits<7> saddr;
126126
bits<10> vdst;
127127

128-
bits<5> cpol;
128+
bits<6> cpol;
129129

130130
// Only valid on gfx9
131131
bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2693,7 +2693,28 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
26932693
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
26942694
}
26952695

2696+
class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
2697+
FLAT_Real_vi <op, ps, has_sccb> {
2698+
let AssemblerPredicate = isGFX9NotGFX90A;
2699+
let Subtarget = SIEncodingFamily.GFX9;
2700+
let DecoderNamespace = "GFX9";
2701+
let Inst{55} = cpol{5}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
2702+
}
2703+
2704+
multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
2705+
let AssemblerPredicate = isGFX8orGFX9After908 in { // GFX8 or GFX9's starting with 90A
2706+
def _vi: FLAT_Real_vi<op, ps, has_sccb>;
2707+
}
2708+
def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
2709+
}
2710+
26962711
multiclass FLAT_Real_AllAddr_vi<bits<7> op,
2712+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2713+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
2714+
defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
2715+
}
2716+
2717+
multiclass FLAT_Real_AllAddr_vi_only<bits<7> op,
26972718
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
26982719
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
26992720
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
@@ -2703,19 +2724,18 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
27032724
FLAT_Real <op, ps>,
27042725
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
27052726
let AssemblerPredicate = isGFX940Plus;
2706-
let DecoderNamespace = "GFX9";
2727+
let DecoderNamespace = "GFX940";
27072728
let Inst{13} = ps.sve;
27082729
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
27092730
}
27102731

27112732
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
2712-
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
2713-
let AssemblerPredicate = isGFX8GFX9NotGFX940;
2714-
let OtherPredicates = [isGFX8GFX9NotGFX940];
2715-
}
2716-
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
2717-
let DecoderNamespace = "GFX9";
2733+
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2734+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
27182735
}
2736+
2737+
defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2738+
27192739
let AssemblerPredicate = isGFX940Plus in {
27202740
def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
27212741
def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2728,11 +2748,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
27282748
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
27292749

27302750
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2731-
def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
2732-
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
2751+
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
2752+
defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
27332753
}
2734-
def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
2735-
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
2754+
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
2755+
defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
27362756
}
27372757
}
27382758

@@ -2748,32 +2768,41 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
27482768
def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
27492769
}
27502770

2751-
def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
2752-
def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
2753-
def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
2754-
def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
2755-
def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
2756-
def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
2757-
def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
2758-
def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
2759-
2760-
def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
2761-
def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2762-
def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
2763-
def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2764-
def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
2765-
def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
2766-
def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
2767-
def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
2768-
2769-
def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2770-
def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2771-
def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2772-
def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2773-
def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
2774-
def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
2771+
defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
2772+
defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
2773+
defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
2774+
defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
2775+
defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
2776+
defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
2777+
defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
2778+
defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
2779+
2780+
defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
2781+
defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2782+
defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
2783+
defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2784+
defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
2785+
defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
2786+
defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
2787+
defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
2788+
2789+
defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2790+
defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2791+
defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2792+
defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2793+
defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
2794+
defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
27752795

27762796
multiclass FLAT_Real_Atomics_vi <bits<7> op,
2797+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2798+
defvar ps = !cast<FLAT_Pseudo>(NAME);
2799+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
2800+
defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
2801+
}
2802+
2803+
// FLAT_Real_mc_vi contains a vi variant and a gfx9 variant. In some cases, only the vi
2804+
// variant is needed.
2805+
multiclass FLAT_Real_Atomics_vi_only <bits<7> op,
27772806
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
27782807
defvar ps = !cast<FLAT_Pseudo>(NAME);
27792808
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
@@ -2784,6 +2813,13 @@ multiclass FLAT_Real_Atomics_vi <bits<7> op,
27842813
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
27852814
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
27862815
FLAT_Real_AllAddr_vi<op, has_sccb> {
2816+
defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
2817+
defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
2818+
}
2819+
2820+
multiclass FLAT_Global_Real_Atomics_vi_only<bits<7> op,
2821+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
2822+
FLAT_Real_AllAddr_vi_only<op, has_sccb> {
27872823
def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
27882824
def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
27892825

@@ -2950,10 +2986,10 @@ let AssemblerPredicate = isGFX940Plus in {
29502986
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
29512987
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
29522988
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
2953-
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
2954-
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
2955-
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
2956-
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
2989+
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_only<0x4d>;
2990+
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_only<0x4e>;
2991+
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_only<0x52>;
2992+
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_only<0x52>;
29572993
} // End AssemblerPredicate = isGFX940Plus
29582994

29592995
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,12 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
186186
O << " dlc";
187187
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
188188
O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
189-
if (Imm & ~CPol::ALL_pregfx12)
189+
if (Imm & ~CPol::ALL_pregfx12) {
190190
if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
191191
O << " nv";
192192
else
193193
O << " /* unexpected cache policy bit */";
194+
}
194195
}
195196

196197
void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -403,16 +403,6 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
403403
Encoding |= getImplicitOpSelHiEncoding(Opcode);
404404
}
405405

406-
// For GFX90A+ targets, bit 55 of the FLAT instructions is the ACC bit
407-
// indicating the use of AGPRs. However, pre-GFX90A, the same bit is for NV.
408-
if ((Desc.TSFlags & SIInstrFlags::FLAT) && AMDGPU::isGFX9(STI) &&
409-
!AMDGPU::isGFX90A(STI)) {
410-
int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
411-
unsigned Cpol = MI.getOperand(Idx).getImm();
412-
if (Cpol & AMDGPU::CPol::NV)
413-
Encoding |= (UINT64_C(1) << 55);
414-
}
415-
416406
// GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
417407
// Documentation requires dst to be encoded as EXEC (0x7E),
418408
// but it looks like the actual value encoded for dst operand

0 commit comments

Comments
 (0)