Skip to content

Conversation

@rampitec
Copy link
Collaborator

@rampitec rampitec commented Aug 6, 2025

Co-authored-by: Pierre Vanhoutryve [email protected]

Copy link
Collaborator Author

rampitec commented Aug 6, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@rampitec rampitec marked this pull request as ready for review August 6, 2025 20:17
@llvmbot llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels Aug 6, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 6, 2025

@llvm/pr-subscribers-mc

Author: Stanislav Mekhanoshin (rampitec)

Changes

Co-authored-by: Pierre Vanhoutryve <[email protected]>


Full diff: https://github.com/llvm/llvm-project/pull/152374.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+4)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp (+4)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_operands.s (+25)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt (+12)
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index deadb7aed0f69..2d0102fffe5ea 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -536,6 +536,10 @@ enum Id { // HwRegCode, (6) [5:0]
   ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
   ID_SQ_PERF_SNAPSHOT_PC_LO = 23,
   ID_SQ_PERF_SNAPSHOT_PC_HI = 24,
+
+  // GFX1250
+  ID_XNACK_STATE_PRIV = 33,
+  ID_XNACK_MASK_gfx1250 = 34,
 };
 
 enum Offset : unsigned { // Offset, (5) [10:6]
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index e433b85489e6e..3d9455fc51a39 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -223,6 +223,10 @@ static constexpr CustomOperand Operands[] = {
   {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
   {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
 
+  // GFX1250
+  {{"HW_REG_XNACK_STATE_PRIV"},       ID_XNACK_STATE_PRIV,       isGFX1250},
+  {{"HW_REG_XNACK_MASK"},             ID_XNACK_MASK_gfx1250,     isGFX1250},
+
   // Aliases
   {{"HW_REG_HW_ID"},                  ID_HW_ID1,                 isGFX10},
 };
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s b/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
index 8b7465b5df574..100fc981c4f81 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
@@ -27,3 +27,28 @@ s_mov_b64 s[0:1], src_shared_limit
 
 s_getreg_b32 s1, hwreg(33)
 // GFX1250: encoding: [0x21,0xf8,0x81,0xb8]
+
+s_getreg_b32 s1, hwreg(HW_REG_XNACK_STATE_PRIV)
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU
+// GFX1250: encoding: [0x21,0xf8,0x81,0xb8]
+
+s_getreg_b32 s1, hwreg(34)
+// GFX1250: encoding: [0x22,0xf8,0x81,0xb8]
+
+s_getreg_b32 s1, hwreg(HW_REG_XNACK_MASK)
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU
+// GFX1250: encoding: [0x22,0xf8,0x81,0xb8]
+
+s_setreg_b32 hwreg(33), s1
+// GFX1250: encoding: [0x21,0xf8,0x01,0xb9]
+
+s_setreg_b32 hwreg(HW_REG_XNACK_STATE_PRIV), s1
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU
+// GFX1250: encoding: [0x21,0xf8,0x01,0xb9]
+
+s_setreg_b32 hwreg(34), s1
+// GFX1250: encoding: [0x22,0xf8,0x01,0xb9]
+
+s_setreg_b32 hwreg(HW_REG_XNACK_MASK), s1
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU
+// GFX1250: encoding: [0x22,0xf8,0x01,0xb9]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
index a3e7e570ab9b6..d72009bc017f4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
@@ -20,3 +20,15 @@
 
 # GFX1250: s_mov_b64 s[0:1], src_shared_limit      ; encoding: [0xec,0x01,0x80,0xbe]
 0xec,0x01,0x80,0xbe
+
+# GFX1250: s_getreg_b32 s1, hwreg(HW_REG_XNACK_STATE_PRIV) ; encoding: [0x21,0xf8,0x81,0xb8]
+0x21,0xf8,0x81,0xb8
+
+# GFX1250: s_getreg_b32 s1, hwreg(HW_REG_XNACK_MASK) ; encoding: [0x22,0xf8,0x81,0xb8]
+0x22,0xf8,0x81,0xb8
+
+# GFX1250: s_setreg_b32 hwreg(HW_REG_XNACK_STATE_PRIV), s1 ; encoding: [0x21,0xf8,0x01,0xb9]
+0x21,0xf8,0x01,0xb9
+
+# GFX1250: s_setreg_b32 hwreg(HW_REG_XNACK_MASK), s1 ; encoding: [0x22,0xf8,0x01,0xb9]
+0x22,0xf8,0x01,0xb9

@llvmbot
Copy link
Member

llvmbot commented Aug 6, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Co-authored-by: Pierre Vanhoutryve <[email protected]>


Full diff: https://github.com/llvm/llvm-project/pull/152374.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+4)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp (+4)
  • (modified) llvm/test/MC/AMDGPU/gfx1250_asm_operands.s (+25)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt (+12)
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index deadb7aed0f69..2d0102fffe5ea 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -536,6 +536,10 @@ enum Id { // HwRegCode, (6) [5:0]
   ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
   ID_SQ_PERF_SNAPSHOT_PC_LO = 23,
   ID_SQ_PERF_SNAPSHOT_PC_HI = 24,
+
+  // GFX1250
+  ID_XNACK_STATE_PRIV = 33,
+  ID_XNACK_MASK_gfx1250 = 34,
 };
 
 enum Offset : unsigned { // Offset, (5) [10:6]
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index e433b85489e6e..3d9455fc51a39 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -223,6 +223,10 @@ static constexpr CustomOperand Operands[] = {
   {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
   {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
 
+  // GFX1250
+  {{"HW_REG_XNACK_STATE_PRIV"},       ID_XNACK_STATE_PRIV,       isGFX1250},
+  {{"HW_REG_XNACK_MASK"},             ID_XNACK_MASK_gfx1250,     isGFX1250},
+
   // Aliases
   {{"HW_REG_HW_ID"},                  ID_HW_ID1,                 isGFX10},
 };
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s b/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
index 8b7465b5df574..100fc981c4f81 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_operands.s
@@ -27,3 +27,28 @@ s_mov_b64 s[0:1], src_shared_limit
 
 s_getreg_b32 s1, hwreg(33)
 // GFX1250: encoding: [0x21,0xf8,0x81,0xb8]
+
+s_getreg_b32 s1, hwreg(HW_REG_XNACK_STATE_PRIV)
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU
+// GFX1250: encoding: [0x21,0xf8,0x81,0xb8]
+
+s_getreg_b32 s1, hwreg(34)
+// GFX1250: encoding: [0x22,0xf8,0x81,0xb8]
+
+s_getreg_b32 s1, hwreg(HW_REG_XNACK_MASK)
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU
+// GFX1250: encoding: [0x22,0xf8,0x81,0xb8]
+
+s_setreg_b32 hwreg(33), s1
+// GFX1250: encoding: [0x21,0xf8,0x01,0xb9]
+
+s_setreg_b32 hwreg(HW_REG_XNACK_STATE_PRIV), s1
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU
+// GFX1250: encoding: [0x21,0xf8,0x01,0xb9]
+
+s_setreg_b32 hwreg(34), s1
+// GFX1250: encoding: [0x22,0xf8,0x01,0xb9]
+
+s_setreg_b32 hwreg(HW_REG_XNACK_MASK), s1
+// GFX1200-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU
+// GFX1250: encoding: [0x22,0xf8,0x01,0xb9]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
index a3e7e570ab9b6..d72009bc017f4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_operands.txt
@@ -20,3 +20,15 @@
 
 # GFX1250: s_mov_b64 s[0:1], src_shared_limit      ; encoding: [0xec,0x01,0x80,0xbe]
 0xec,0x01,0x80,0xbe
+
+# GFX1250: s_getreg_b32 s1, hwreg(HW_REG_XNACK_STATE_PRIV) ; encoding: [0x21,0xf8,0x81,0xb8]
+0x21,0xf8,0x81,0xb8
+
+# GFX1250: s_getreg_b32 s1, hwreg(HW_REG_XNACK_MASK) ; encoding: [0x22,0xf8,0x81,0xb8]
+0x22,0xf8,0x81,0xb8
+
+# GFX1250: s_setreg_b32 hwreg(HW_REG_XNACK_STATE_PRIV), s1 ; encoding: [0x21,0xf8,0x01,0xb9]
+0x21,0xf8,0x01,0xb9
+
+# GFX1250: s_setreg_b32 hwreg(HW_REG_XNACK_MASK), s1 ; encoding: [0x22,0xf8,0x01,0xb9]
+0x22,0xf8,0x01,0xb9

@rampitec rampitec merged commit 66392a8 into main Aug 6, 2025
9 checks passed
@rampitec rampitec deleted the users/rampitec/08-06-_amdgpu_add_xnack_state_priv_and__mask_gfx1250_registers branch August 6, 2025 21:44
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 6, 2025

LLVM Buildbot has detected a new failure on builder mlir-nvidia running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/17201

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x573834048e00 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x573834048e00 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:mc Machine (object) code

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants