Skip to content

Commit 40c04b7

Browse files
committed
[AMDGPU][MC] GFX9 - Support NV bit in FLAT instructions in pre-GFX90A
targets This patch enables support of the NV (non-volatile) bit in FLAT instructions in GFX9 (pre-GFX90A) targets.
1 parent 85265a9 commit 40c04b7

File tree

6 files changed

+1758
-2
lines changed

6 files changed

+1758
-2
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5371,7 +5371,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
53715371
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
53725372
Error(S, "scale_offset is not supported on this GPU");
53735373
}
5374-
if (CPol & CPol::NV) {
5374+
if ((CPol & CPol::NV) && (!isGFX9() || isGFX90A())) {
5375+
// nv not supported on GFX90A+
53755376
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
53765377
StringRef CStr(S.getPointer());
53775378
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
@@ -7165,6 +7166,13 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
71657166
unsigned Enabled = 0, Seen = 0;
71667167
for (;;) {
71677168
SMLoc S = getLoc();
7169+
7170+
if (isGFX9() && trySkipId("nv")) {
7171+
Enabled |= CPol::NV;
7172+
Seen |= CPol::NV;
7173+
continue;
7174+
}
7175+
71687176
bool Disabling;
71697177
unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
71707178
if (!CPol)

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,19 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
791791
}
792792
}
793793

794+
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::FLAT) {
795+
if (isGFX9() && !isGFX90A()) {
796+
// Pre-GFX90A GFX9's use bit 55 as NV.
797+
assert(Bytes_.size() >= 8);
798+
if (Bytes_[6] & 0x80) { // check bit 55
799+
int CPolIdx =
800+
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
801+
MI.getOperand(CPolIdx).setImm(MI.getOperand(CPolIdx).getImm() |
802+
AMDGPU::CPol::NV);
803+
}
804+
}
805+
}
806+
794807
if ((MCII->get(MI.getOpcode()).TSFlags &
795808
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
796809
(STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,10 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
187187
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
188188
O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
189189
if (Imm & ~CPol::ALL_pregfx12)
190-
O << " /* unexpected cache policy bit */";
190+
if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
191+
O << " nv";
192+
else
193+
O << " /* unexpected cache policy bit */";
191194
}
192195

193196
void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,16 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
403403
Encoding |= getImplicitOpSelHiEncoding(Opcode);
404404
}
405405

406+
// For GFX90A+ targets, bit 55 of the FLAT instructions is the ACC bit
407+
// indicating the use of AGPRs. However, pre-GFX90A, the same bit is for NV.
408+
if ((Desc.TSFlags & SIInstrFlags::FLAT) && AMDGPU::isGFX9(STI) &&
409+
!AMDGPU::isGFX90A(STI)) {
410+
int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
411+
unsigned Cpol = MI.getOperand(Idx).getImm();
412+
if (Cpol & AMDGPU::CPol::NV)
413+
Encoding |= (UINT64_C(1) << 55);
414+
}
415+
406416
// GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
407417
// Documentation requires dst to be encoded as EXEC (0x7E),
408418
// but it looks like the actual value encoded for dst operand

0 commit comments

Comments
 (0)