Skip to content

Commit ea20bff

Browse files
committed
Define new real instructions in table-gen files so that there's
no need to do the encoding/decoding in CPP code.
1 parent be33095 commit ea20bff

File tree

5 files changed

+88
-65
lines changed

5 files changed

+88
-65
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2309,6 +2309,16 @@ def isGFX8GFX9NotGFX90A :
23092309
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
23102310
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
23112311

2312+
def isGFX9NotGFX90A :
2313+
Predicate<"!Subtarget->hasGFX90AInsts() &&"
2314+
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
2315+
AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>;
2316+
2317+
def isGFX8orGFX9After908 :
2318+
Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||"
2319+
" ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">,
2320+
AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>;
2321+
23122322
def isGFX90AOnly :
23132323
Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
23142324
AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -823,19 +823,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
823823
}
824824
}
825825

826-
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::FLAT) {
827-
if (isGFX9() && !isGFX90A()) {
828-
// Pre-GFX90A GFX9's use bit 55 as NV.
829-
assert(Bytes_.size() >= 8);
830-
if (Bytes_[6] & 0x80) { // check bit 55
831-
int CPolIdx =
832-
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
833-
MI.getOperand(CPolIdx).setImm(MI.getOperand(CPolIdx).getImm() |
834-
AMDGPU::CPol::NV);
835-
}
836-
}
837-
}
838-
839826
if ((MCII->get(MI.getOpcode()).TSFlags &
840827
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
841828
(STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 76 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
123123
bits<7> saddr;
124124
bits<10> vdst;
125125

126-
bits<5> cpol;
126+
bits<6> cpol;
127127

128128
// Only valid on gfx9
129129
bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2481,7 +2481,28 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
24812481
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
24822482
}
24832483

2484+
class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
2485+
FLAT_Real_vi <op, ps, has_sccb> {
2486+
let AssemblerPredicate = isGFX9NotGFX90A;
2487+
let Subtarget = SIEncodingFamily.GFX9;
2488+
let DecoderNamespace = "GFX9";
2489+
let Inst{55} = cpol{5}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
2490+
}
2491+
2492+
multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
2493+
let AssemblerPredicate = isGFX8orGFX9After908 in { // GFX8 or GFX9's starting with 90A
2494+
def _vi: FLAT_Real_vi<op, ps, has_sccb>;
2495+
}
2496+
def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
2497+
}
2498+
24842499
multiclass FLAT_Real_AllAddr_vi<bits<7> op,
2500+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2501+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
2502+
defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
2503+
}
2504+
2505+
multiclass FLAT_Real_AllAddr_vi_only<bits<7> op,
24852506
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
24862507
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
24872508
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
@@ -2491,19 +2512,18 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
24912512
FLAT_Real <op, ps>,
24922513
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
24932514
let AssemblerPredicate = isGFX940Plus;
2494-
let DecoderNamespace = "GFX9";
2515+
let DecoderNamespace = "GFX940";
24952516
let Inst{13} = ps.sve;
24962517
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
24972518
}
24982519

24992520
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
2500-
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
2501-
let AssemblerPredicate = isGFX8GFX9NotGFX940;
2502-
let OtherPredicates = [isGFX8GFX9NotGFX940];
2503-
}
2504-
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
2505-
let DecoderNamespace = "GFX9";
2521+
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2522+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
25062523
}
2524+
2525+
defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2526+
25072527
let AssemblerPredicate = isGFX940Plus in {
25082528
def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
25092529
def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2516,11 +2536,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
25162536
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
25172537

25182538
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
2519-
def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
2520-
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
2539+
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
2540+
defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
25212541
}
2522-
def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
2523-
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
2542+
let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
2543+
defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
25242544
}
25252545
}
25262546

@@ -2536,32 +2556,41 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
25362556
def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
25372557
}
25382558

2539-
def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
2540-
def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
2541-
def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
2542-
def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
2543-
def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
2544-
def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
2545-
def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
2546-
def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
2547-
2548-
def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
2549-
def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2550-
def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
2551-
def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2552-
def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
2553-
def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
2554-
def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
2555-
def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
2556-
2557-
def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2558-
def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2559-
def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2560-
def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2561-
def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
2562-
def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
2559+
defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
2560+
defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
2561+
defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
2562+
defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
2563+
defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
2564+
defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
2565+
defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
2566+
defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
2567+
2568+
defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
2569+
defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
2570+
defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
2571+
defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
2572+
defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
2573+
defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
2574+
defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
2575+
defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
2576+
2577+
defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
2578+
defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
2579+
defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
2580+
defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
2581+
defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
2582+
defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
25632583

25642584
multiclass FLAT_Real_Atomics_vi <bits<7> op,
2585+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
2586+
defvar ps = !cast<FLAT_Pseudo>(NAME);
2587+
defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
2588+
defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
2589+
}
2590+
2591+
// FLAT_Real_mc_vi contains a vi variant and a gfx9 variant. In some cases, only the vi
2592+
// variant is needed.
2593+
multiclass FLAT_Real_Atomics_vi_only <bits<7> op,
25652594
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
25662595
defvar ps = !cast<FLAT_Pseudo>(NAME);
25672596
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
@@ -2571,11 +2600,17 @@ multiclass FLAT_Real_Atomics_vi <bits<7> op,
25712600
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
25722601
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
25732602
FLAT_Real_AllAddr_vi<op, has_sccb> {
2603+
defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
2604+
defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
2605+
}
2606+
2607+
multiclass FLAT_Global_Real_Atomics_vi_only<bits<7> op,
2608+
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
2609+
FLAT_Real_AllAddr_vi_only<op, has_sccb> {
25742610
def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
25752611
def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
25762612
}
25772613

2578-
25792614
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>;
25802615
defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41>;
25812616
defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42>;
@@ -2735,10 +2770,10 @@ let AssemblerPredicate = isGFX940Plus in {
27352770
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
27362771
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
27372772
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
2738-
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
2739-
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
2740-
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
2741-
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
2773+
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_only<0x4d>;
2774+
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_only<0x4e>;
2775+
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_only<0x52>;
2776+
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_only<0x52>;
27422777
} // End AssemblerPredicate = isGFX940Plus
27432778

27442779
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,12 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
179179
O << " dlc";
180180
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
181181
O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
182-
if (Imm & ~CPol::ALL_pregfx12)
182+
if (Imm & ~CPol::ALL_pregfx12) {
183183
if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
184184
O << " nv";
185185
else
186186
O << " /* unexpected cache policy bit */";
187+
}
187188
}
188189

189190
void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -396,16 +396,6 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
396396
Encoding |= getImplicitOpSelHiEncoding(Opcode);
397397
}
398398

399-
// For GFX90A+ targets, bit 55 of the FLAT instructions is the ACC bit
400-
// indicating the use of AGPRs. However, pre-GFX90A, the same bit is for NV.
401-
if ((Desc.TSFlags & SIInstrFlags::FLAT) && AMDGPU::isGFX9(STI) &&
402-
!AMDGPU::isGFX90A(STI)) {
403-
int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
404-
unsigned Cpol = MI.getOperand(Idx).getImm();
405-
if (Cpol & AMDGPU::CPol::NV)
406-
Encoding |= (UINT64_C(1) << 55);
407-
}
408-
409399
// GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
410400
// Documentation requires dst to be encoded as EXEC (0x7E),
411401
// but it looks like the actual value encoded for dst operand

0 commit comments

Comments
 (0)