Skip to content

Conversation

@rampitec
Copy link
Collaborator

No description provided.

Copy link
Collaborator Author

rampitec commented Jul 21, 2025

@rampitec rampitec requested a review from changpeng July 21, 2025 19:33
@rampitec rampitec marked this pull request as ready for review July 21, 2025 19:33
@llvmbot llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels Jul 21, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 21, 2025

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Patch is 67.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149881.diff

17 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+27-1)
  • (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+2)
  • (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+3)
  • (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3)
  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+2)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+1)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+13)
  • (modified) llvm/lib/Target/AMDGPU/SMInstructions.td (+1)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+19)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+3)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_smem.s (+27)
  • (added) llvm/test/MC/AMDGPU/gfx1250_asm_smem_err.s (+16)
  • (added) llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf_err.s (+6)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s (+272)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s (+55)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_smem.txt (+12)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt (+189)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5bb0e3648d2ec..421fc429048ff 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5324,6 +5324,12 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
   unsigned CPol = Inst.getOperand(CPolPos).getImm();
 
   if (!isGFX1250()) {
+    if (CPol & CPol::SCAL) {
+      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+      StringRef CStr(S.getPointer());
+      S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
+      Error(S, "scale_offset is not supported on this GPU");
+    }
     if (CPol & CPol::NV) {
       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
       StringRef CStr(S.getPointer());
@@ -5332,6 +5338,13 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
     }
   }
 
+  if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
+    SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+    StringRef CStr(S.getPointer());
+    S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
+    Error(S, "scale_offset is not supported for this instruction");
+  }
+
   if (isGFX12Plus())
     return validateTHAndScopeBits(Inst, Operands, CPol);
 
@@ -7003,6 +7016,7 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
     ParseStatus ResTH = ParseStatus::NoMatch;
     ParseStatus ResScope = ParseStatus::NoMatch;
     ParseStatus ResNV = ParseStatus::NoMatch;
+    ParseStatus ResScal = ParseStatus::NoMatch;
 
     for (;;) {
       if (ResTH.isNoMatch()) {
@@ -7041,10 +7055,22 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
         }
       }
 
+      if (ResScal.isNoMatch()) {
+        if (trySkipId("scale_offset")) {
+          ResScal = ParseStatus::Success;
+          CPolVal |= CPol::SCAL;
+          continue;
+        } else if (trySkipId("no", "scale_offset")) {
+          ResScal = ParseStatus::Success;
+          continue;
+        }
+      }
+
       break;
     }
 
-    if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch())
+    if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
+        ResScal.isNoMatch())
       return ParseStatus::NoMatch;
 
     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index f7f29f17f9d0e..1cc717bb437de 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -2941,6 +2941,7 @@ multiclass VFLAT_Real_gfx12 <bits<8> op, string name = get_FLAT_ps<NAME>.Mnemoni
     let DecoderNamespace = "GFX12";
 
     let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch};
+    let Inst{48} = cpol{CPolBit.SCAL}; // scale offset
   }
 }
 
@@ -3170,6 +3171,7 @@ multiclass VFLAT_Real_gfx1250<bits<8> op,
     let DecoderNamespace = "GFX1250";
 
     let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch};
+    let Inst{48} = cpol{CPolBit.SCAL}; // scale offset
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 268162bcada47..cbc517d3da680 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1162,6 +1162,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
 
+  // Scalar and global loads support scale_offset bit.
+  bool hasScaleOffset() const { return GFX1250Insts; }
+
   bool hasFlatGVSMode() const { return FlatGVSMode; }
 
   bool enableSIScheduler() const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 197bb3f0b569b..11b072e05a6e1 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -157,6 +157,9 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
     const int64_t TH = Imm & CPol::TH;
     const int64_t Scope = Imm & CPol::SCOPE;
 
+    if (Imm & CPol::SCAL)
+      O << " scale_offset";
+
     printTH(MI, TH, Scope, O);
     printScope(Scope, O);
 
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index d3790880532a8..3902d4c3b1027 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -402,6 +402,8 @@ enum CPol {
 
   SWZ = 1 << 6, // Swizzle bit
 
+  SCAL = 1 << 11, // Scale offset bit
+
   ALL = TH | SCOPE,
 
   // Helper bits
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 6b419347c01d9..89d9b0d32b25b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -318,6 +318,7 @@ def CPolBit {
   int DLC = 2;
   int SCC = 4;
   int NV = 5;
+  int SCAL = 11;
 }
 
 class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e2a2525d909bd..571f3efd68260 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5482,6 +5482,19 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     }
   }
 
+  if (const MachineOperand *CPol = getNamedOperand(MI, AMDGPU::OpName::cpol)) {
+    if (CPol->getImm() & AMDGPU::CPol::SCAL) {
+      if (!ST.hasScaleOffset()) {
+        ErrInfo = "Subtarget does not support offset scaling";
+        return false;
+      }
+      if (!AMDGPU::supportsScaleOffset(*this, MI.getOpcode())) {
+        ErrInfo = "Instruction does not support offset scaling";
+        return false;
+      }
+    }
+  }
+
   return true;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index d8b52d271a964..0850c41c933de 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -1488,6 +1488,7 @@ class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offs
   let Inst{20} = cpol{CPolBit.NV}; // non-volatile
   let Inst{22-21} = cpol{4-3}; // scope
   let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
+  let Inst{56} = cpol{CPolBit.SCAL}; // scale offset
 }
 
 multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 9c6c3746f9007..b5b3cc97569ed 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3228,6 +3228,25 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
                           : getGfx9BufferFormatInfo(Format);
 }
 
+bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
+  uint64_t TSFlags = MII.get(Opcode).TSFlags;
+
+  if (TSFlags & SIInstrFlags::SMRD)
+    return !getSMEMIsBuffer(Opcode);
+  if (!(TSFlags & SIInstrFlags::FLAT))
+    return false;
+
+  // Only SV and SVS modes are supported.
+  if (TSFlags & SIInstrFlags::FlatScratch)
+    return hasNamedOperand(Opcode, OpName::vaddr);
+
+  // Only GVS mode is supported.
+  return hasNamedOperand(Opcode, OpName::vaddr) &&
+         hasNamedOperand(Opcode, OpName::saddr);
+
+  return false;
+}
+
 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
   for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
     int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index bde951b43d105..c09a9d694f3d8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1757,6 +1757,9 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID);
 /// \returns true if the intrinsic is uniform
 bool isIntrinsicAlwaysUniform(unsigned IntrID);
 
+/// \returns true if a memory instruction supports scale_offset modifier.
+bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
+
 /// \returns lds block size in terms of dwords. \p
 /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
 /// must be defined in terms of bytes.
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
index 899c4c7aca0ba..800f66221114e 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_smem.s
@@ -12,3 +12,30 @@ s_buffer_load_i8 s5, s[4:7], s0 nv
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
 // GFX12-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 nv
 // GFX12-ERR-NEXT:{{^}}                                ^
+
+s_load_b32 s4, s[2:3], 0xa scale_offset
+// GFX1250: s_load_b32 s4, s[2:3], 0xa scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x0a,0x00,0x00,0xf9]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 0xa scale_offset
+// GFX12-ERR-NEXT:{{^}}                           ^
+
+s_load_b32 s4, s[2:3], 0xa scale_offset nv
+// GFX1250: s_load_b32 s4, s[2:3], 0xa scale_offset nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf9]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 0xa scale_offset nv
+// GFX12-ERR-NEXT:{{^}}                           ^
+// GFX12-ERR-NEXT: error: nv is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 0xa scale_offset nv
+// GFX12-ERR-NEXT:{{^}}                                        ^
+
+s_load_b32 s4, s[2:3], s5 offset:32 scale_offset
+// GFX1250: s_load_b32 s4, s[2:3], s5 offset:0x20 scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x20,0x00,0x00,0x0b]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], s5 offset:32 scale_offset
+// GFX12-ERR-NEXT:{{^}}                                    ^
+
+s_load_b32 s4, s[2:3], m0 offset:32 scale_offset
+// GFX1250: s_load_b32 s4, s[2:3], m0 offset:0x20 scale_offset ; encoding: [0x01,0x01,0x00,0xf4,0x20,0x00,0x00,0xfb]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], m0 offset:32 scale_offset
+// GFX12-ERR-NEXT:{{^}}                                    ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_smem_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_smem_err.s
new file mode 100644
index 0000000000000..e57d4fc760309
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_smem_err.s
@@ -0,0 +1,16 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
+
+s_buffer_load_i8 s5, s[4:7], s0 scale_offset
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction
+// GFX1250-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 scale_offset
+// GFX1250-ERR-NEXT:{{^}}                                ^
+
+s_prefetch_data s[18:19], 100, s10, 7 nv
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX1250-ERR-NEXT:{{^}}s_prefetch_data s[18:19], 100, s10, 7 nv
+// GFX1250-ERR-NEXT:{{^}}                                      ^
+
+s_prefetch_data s[18:19], 100, s10, 7 scale_offset
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX1250-ERR-NEXT:{{^}}s_prefetch_data s[18:19], 100, s10, 7 scale_offset
+// GFX1250-ERR-NEXT:{{^}}                                      ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf_err.s
new file mode 100644
index 0000000000000..731eb6770ceed
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf_err.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
+
+buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 scale_offset
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction
+// GFX1250-ERR-NEXT:{{^}}buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 scale_offset
+// GFX1250-ERR-NEXT:{{^}}                                                      ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
index 488040e1b5390..d3a49f2eb25fa 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -61,6 +61,54 @@ scratch_load_b32 v5, v2, off nv
 // GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off nv
 // GFX12-ERR-NEXT:{{^}}                             ^
 
+global_load_b32 v5, v1, s[2:3] offset:32 scale_offset
+// GFX1250: global_load_b32 v5, v1, s[2:3] offset:32 scale_offset ; encoding: [0x02,0x00,0x05,0xee,0x05,0x00,0x01,0x00,0x01,0x20,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_load_b32 v5, v1, s[2:3] offset:32 scale_offset
+// GFX12-ERR-NEXT:{{^}}                                         ^
+
+global_store_b32 v5, v1, s[2:3] offset:32 scale_offset
+// GFX1250: global_store_b32 v5, v1, s[2:3] offset:32 scale_offset ; encoding: [0x02,0x80,0x06,0xee,0x00,0x00,0x81,0x00,0x05,0x20,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_store_b32 v5, v1, s[2:3] offset:32 scale_offset
+// GFX12-ERR-NEXT:{{^}}                                          ^
+
+global_atomic_add_u32 v2, v5, s[2:3] scale_offset
+// GFX1250: global_atomic_add_u32 v2, v5, s[2:3] scale_offset ; encoding: [0x02,0x40,0x0d,0xee,0x00,0x00,0x81,0x02,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}global_atomic_add_u32 v2, v5, s[2:3] scale_offset
+// GFX12-ERR-NEXT:{{^}}                                     ^
+
+scratch_load_b32 v5, v2, off scale_offset
+// GFX1250: scratch_load_b32 v5, v2, off scale_offset ; encoding: [0x7c,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off scale_offset
+// GFX12-ERR-NEXT:{{^}}                             ^
+
+scratch_load_b32 v5, v2, off offset:32 scale_offset
+// GFX1250: scratch_load_b32 v5, v2, off offset:32 scale_offset ; encoding: [0x7c,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x20,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off offset:32 scale_offset
+// GFX12-ERR-NEXT:{{^}}                                       ^
+
+scratch_load_b32 v5, v2, s1 offset:32 scale_offset
+// GFX1250: scratch_load_b32 v5, v2, s1 offset:32 scale_offset ; encoding: [0x01,0x00,0x05,0xed,0x05,0x00,0x03,0x00,0x02,0x20,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, s1 offset:32 scale_offset
+// GFX12-ERR-NEXT:{{^}}                                      ^
+
+scratch_store_b32 v2, v5, off scale_offset
+// GFX1250: scratch_store_b32 v2, v5, off scale_offset ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v5, off scale_offset
+// GFX12-ERR-NEXT:{{^}}                              ^
+
+scratch_store_b32 v2, v5, s1 scale_offset
+// GFX1250: scratch_store_b32 v2, v5, s1 scale_offset ; encoding: [0x01,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: scale_offset is not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v5, s1 scale_offset
+// GFX12-ERR-NEXT:{{^}}                             ^
+
 tensor_save s[0:1]
 // GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
@@ -81,10 +129,18 @@ tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS
 // GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
+flat_atomic_add_f32 v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN
+// GFX1250: flat_atomic_add_f32 v1, v2, s[2:3] offset:8000000 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x15,0xec,0x00,0x00,0x11,0x01,0x01,0x00,0x12,0x7a]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
 flat_atomic_add_f32 v2, v3, s[2:3] offset:64
 // GFX1250: flat_atomic_add_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x15,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
+flat_atomic_add_u32 v1, v2, s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN
+// GFX1250: flat_atomic_add_u32 v1, v2, s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x0d,0xec,0x00,0x00,0x11,0x01,0x01,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
 flat_atomic_add_u32 v2, v3, s[2:3] offset:-64
 // GFX1250: flat_atomic_add_u32 v2, v3, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0xc0,0xff,0xff]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -93,6 +149,14 @@ flat_atomic_add_u64 v2, v[2:3], s[2:3] offset:64
 // GFX1250: flat_atomic_add_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
+flat_atomic_add_u64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN
+// GFX1250: flat_atomic_add_u64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x10,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+
+flat_atomic_and_b32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN
+// GFX1250: flat_atomic_and_b32 v1, v2, s[2:3] offset:64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0f,0xec,0x00,0x00,0x11,0x01,0x01,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
 flat_atomic_and_b32 v2, v3, s[2:3] offset:64
 // GFX1250: flat_atomic_and_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -101,18 +165,38 @@ flat_atomic_and_b64 v2, v[2:3], s[2:3] offset:64
 // GFX1250: flat_atomic_and_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
+flat_atomic_and_b64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN
+// GFX1250: flat_atomic_and_b64 v[0:1], v2, v[2:3], s[2:3] offset:-64 scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x12,0xec,0x00,0x00,0x11,0x01,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
+
+flat_atomic_cmpswap_b32 v0, v2, v[2:3], s[2:3] scale_offset th:TH_ATOMIC_RETURN
+// GFX1250: flat_atomic_cmpswap_b32 v0, v2, v[2:3], s[2:3] scale_offset th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0d,0xec,0x00,0x00,0x11,0x01,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
 flat_atomic_cmpswap_b32 v2, v[2:3], s[2:3] offset:64
 // GFX1250: flat_atomic_cmpswap_b32 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: e...
[truncated]

Base automatically changed from users/rampitec/07-21-_amdgpu_verify_asm_vgpr_alignment_on_gfx1250 to main July 21, 2025 21:23
@rampitec rampitec force-pushed the users/rampitec/07-21-_amdgpu_mc_support_for_gfx1250_scale_offset_modifier branch from 7b416c7 to d25d6ba Compare July 21, 2025 21:24
@rampitec rampitec merged commit a0b854d into main Jul 21, 2025
9 checks passed
@rampitec rampitec deleted the users/rampitec/07-21-_amdgpu_mc_support_for_gfx1250_scale_offset_modifier branch July 21, 2025 22:05
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Jul 28, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:mc Machine (object) code

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants