[AMDGPU][MC] GFX9 - Support NV bit in FLAT instructions in pre-GFX90A

jwanggit86 · jwanggit86 · commit 40c04b7b965c · 2025-10-16T13:22:01.000-07:00
targets

This patch enables support of the NV (non-volatile) bit in FLAT
instructions in GFX9 (pre-GFX90A) targets.
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5371,7 +5371,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
       Error(S, "scale_offset is not supported on this GPU");
     }
-    if (CPol & CPol::NV) {
+    if ((CPol & CPol::NV) && (!isGFX9() || isGFX90A())) {
+      // nv not supported on GFX90A+
       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
       StringRef CStr(S.getPointer());
       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
@@ -7165,6 +7166,13 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
   unsigned Enabled = 0, Seen = 0;
   for (;;) {
     SMLoc S = getLoc();
+
+    if (isGFX9() && trySkipId("nv")) {
+      Enabled |= CPol::NV;
+      Seen |= CPol::NV;
+      continue;
+    }
+
     bool Disabling;
     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
     if (!CPol)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -791,6 +791,19 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
+  if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::FLAT) {
+    if (isGFX9() && !isGFX90A()) {
+      // Pre-GFX90A GFX9's use bit 55 as NV.
+      assert(Bytes_.size() >= 8);
+      if (Bytes_[6] & 0x80) { // check bit 55
+        int CPolIdx =
+            AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
+        MI.getOperand(CPolIdx).setImm(MI.getOperand(CPolIdx).getImm() |
+                                      AMDGPU::CPol::NV);
+      }
+    }
+  }
+
   if ((MCII->get(MI.getOpcode()).TSFlags &
        (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
       (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -187,7 +187,10 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
   if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
     O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
   if (Imm & ~CPol::ALL_pregfx12)
-    O << " /* unexpected cache policy bit */";
+    if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
+      O << " nv";
+    else
+      O << " /* unexpected cache policy bit */";
 }
 
 void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -403,6 +403,16 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
     Encoding |= getImplicitOpSelHiEncoding(Opcode);
   }
 
+  // For GFX90A+ targets, bit 55 of the FLAT instructions is the ACC bit
+  // indicating the use of AGPRs. However, pre-GFX90A, the same bit is for NV.
+  if ((Desc.TSFlags & SIInstrFlags::FLAT) && AMDGPU::isGFX9(STI) &&
+      !AMDGPU::isGFX90A(STI)) {
+    int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol);
+    unsigned Cpol = MI.getOperand(Idx).getImm();
+    if (Cpol & AMDGPU::CPol::NV)
+      Encoding |= (UINT64_C(1) << 55);
+  }
+
   // GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
   // Documentation requires dst to be encoded as EXEC (0x7E),
   // but it looks like the actual value encoded for dst operand
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt