From 43f1a87f103ac0ccfc016989e9936cc405cd3d31 Mon Sep 17 00:00:00 2001 From: Aleksandar Spasojevic Date: Mon, 16 Jun 2025 14:42:25 +0200 Subject: [PATCH 1/4] [AMDGPU] Add GFX12 wave register names with WAVE_ prefix Rename canonical register names with WAVE_ prefix for GFX12 - Maintain backward compatibility through aliases --- .../Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 45 ++++++++++++------ .../dynamic-vgpr-reserve-stack-for-cwsr.ll | 28 +++++------ llvm/test/MC/AMDGPU/gfx12_asm_sopk.s | 28 +++++------ llvm/test/MC/AMDGPU/gfx12_asm_sopk_alias.s | 44 +++++++++++++++++- .../Disassembler/AMDGPU/gfx12_dasm_sopk.txt | 46 +++++++++---------- 5 files changed, 124 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 14ebbf8e9c929..8803b62b7ebb0 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -169,7 +169,14 @@ namespace Hwreg { // NOLINTBEGIN // clang-format off static constexpr CustomOperand Operands[] = { - {{""}}, + // GFX12+ renamed registers + {{"HW_REG_WAVE_MODE"}, ID_MODE, isGFX12Plus}, + {{"HW_REG_WAVE_STATUS"}, ID_STATUS, isGFX12Plus}, + {{"HW_REG_WAVE_GPR_ALLOC"}, ID_GPR_ALLOC, isGFX12Plus}, + {{"HW_REG_WAVE_LDS_ALLOC"}, ID_LDS_ALLOC, isGFX12Plus}, + {{"HW_REG_WAVE_HW_ID1"}, ID_HW_ID1, isGFX12Plus}, + {{"HW_REG_WAVE_HW_ID2"}, ID_HW_ID2, isGFX12Plus}, + {{"HW_REG_MODE"}, ID_MODE}, {{"HW_REG_STATUS"}, ID_STATUS}, {{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus}, @@ -198,25 +205,25 @@ static constexpr CustomOperand Operands[] = { {{""}}, {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11}, {{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250}, - {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11}, - {{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus}, - {{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus}, - {{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus}, + {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11}, + {{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus}, + {{"HW_REG_WAVE_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus}, + {{"HW_REG_WAVE_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus}, // Register numbers reused in GFX11 {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11}, {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11}, // Register numbers reused in GFX12+ - {{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus}, - {{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus}, - {{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus}, - {{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus}, - {{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus}, - {{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus}, - {{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus}, - {{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, - {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus}, + {{"HW_REG_WAVE_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus}, + {{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus}, + {{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus}, + {{"HW_REG_WAVE_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus}, + {{"HW_REG_WAVE_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus}, + {{"HW_REG_WAVE_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus}, + {{"HW_REG_WAVE_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus}, + {{"HW_REG_WAVE_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, + {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus}, // GFX942 specific registers {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, @@ -230,7 +237,15 @@ static constexpr CustomOperand Operands[] = { {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK_gfx1250, isGFX1250}, // Aliases - {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, + {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, + {{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus}, + {{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus}, + {{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus}, + {{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus}, + {{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus}, + {{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, + {{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus}, + {{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus}, }; // clang-format on // NOLINTEND diff --git a/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll b/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll index ac30297770807..bcccf50e3805c 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll @@ -7,7 +7,7 @@ define amdgpu_cs void @amdgpu_cs() #0 { ; CHECK-LABEL: amdgpu_cs: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; CHECK-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 @@ -19,7 +19,7 @@ define amdgpu_cs void @amdgpu_cs() #0 { define amdgpu_kernel void @kernel() #0 { ; CHECK-LABEL: kernel: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; CHECK-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 @@ -31,7 +31,7 @@ define amdgpu_kernel void @kernel() #0 { define amdgpu_cs void @with_local() #0 { ; CHECK-TRUE16-LABEL: with_local: ; CHECK-TRUE16: ; %bb.0: -; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 @@ -42,7 +42,7 @@ define amdgpu_cs void @with_local() #0 { ; ; CHECK-FAKE16-LABEL: with_local: ; CHECK-FAKE16: ; %bb.0: -; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 @@ -60,7 +60,7 @@ define amdgpu_cs void @with_local() #0 { define amdgpu_cs void @with_calls_inline_const() #0 { ; CHECK-TRUE16-LABEL: with_calls_inline_const: ; CHECK-TRUE16: ; %bb.0: -; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi @@ -76,7 +76,7 @@ define amdgpu_cs void @with_calls_inline_const() #0 { ; ; CHECK-FAKE16-LABEL: with_calls_inline_const: ; CHECK-FAKE16: ; %bb.0: -; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi @@ -100,7 +100,7 @@ define amdgpu_cs void @with_calls_inline_const() #0 { define amdgpu_cs void @with_calls_no_inline_const() #0 { ; CHECK-TRUE16-LABEL: with_calls_no_inline_const: ; CHECK-TRUE16: ; %bb.0: -; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi @@ -117,7 +117,7 @@ define amdgpu_cs void @with_calls_no_inline_const() #0 { ; ; CHECK-FAKE16-LABEL: with_calls_no_inline_const: ; CHECK-FAKE16: ; %bb.0: -; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi @@ -140,7 +140,7 @@ define amdgpu_cs void @with_calls_no_inline_const() #0 { define amdgpu_cs void @with_spills() #0 { ; CHECK-LABEL: with_spills: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; CHECK-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 @@ -153,7 +153,7 @@ define amdgpu_cs void @with_spills() #0 { define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 { ; CHECK-LABEL: realign_stack: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-NEXT: v_mov_b32_e32 v32, 0 ; CHECK-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-NEXT: s_mov_b32 s1, callee@abs32@hi @@ -187,7 +187,7 @@ define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 { define amdgpu_cs void @frame_pointer_none() #1 { ; CHECK-TRUE16-LABEL: frame_pointer_none: ; CHECK-TRUE16: ; %bb.0: -; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 @@ -198,7 +198,7 @@ define amdgpu_cs void @frame_pointer_none() #1 { ; ; CHECK-FAKE16-LABEL: frame_pointer_none: ; CHECK-FAKE16: ; %bb.0: -; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 @@ -214,7 +214,7 @@ define amdgpu_cs void @frame_pointer_none() #1 { define amdgpu_cs void @frame_pointer_all() #2 { ; CHECK-TRUE16-LABEL: frame_pointer_all: ; CHECK-TRUE16: ; %bb.0: -; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 @@ -225,7 +225,7 @@ define amdgpu_cs void @frame_pointer_all() #2 { ; ; CHECK-FAKE16-LABEL: frame_pointer_all: ; CHECK-FAKE16: ; %bb.0: -; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) +; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_WAVE_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s index 4e3e725a00556..819ecb866c5ae 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopk.s @@ -193,19 +193,19 @@ s_call_b64 vcc, 0x1234 s_call_b64 null, 0x1234 // GFX12: encoding: [0x34,0x12,0x7c,0xba] -s_getreg_b32 s0, hwreg(HW_REG_MODE) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_MODE) // GFX12: encoding: [0x01,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_STATUS) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_STATUS) // GFX12: encoding: [0x02,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_STATE_PRIV) // GFX12: encoding: [0x04,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_GPR_ALLOC) // GFX12: encoding: [0x05,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_LDS_ALLOC) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_LDS_ALLOC) // GFX12: encoding: [0x06,0xf8,0x80,0xb8] s_getreg_b32 s0, hwreg(HW_REG_IB_STS) @@ -226,31 +226,31 @@ s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1) s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2) // GFX12: encoding: [0x10,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) // GFX12: encoding: [0x11,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) // GFX12: encoding: [0x12,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_TRAP_CTRL) // GFX12: encoding: [0x13,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) // GFX12: encoding: [0x14,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) // GFX12: encoding: [0x15,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_HW_ID1) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_HW_ID1) // GFX12: encoding: [0x17,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_HW_ID2) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_HW_ID2) // GFX12: encoding: [0x18,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_DVGPR_ALLOC_LO) // GFX12: encoding: [0x1f,0xf8,0x80,0xb8] -s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) +s_getreg_b32 s0, hwreg(HW_REG_WAVE_DVGPR_ALLOC_HI) // GFX12: encoding: [0x20,0xf8,0x80,0xb8] s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopk_alias.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopk_alias.s index 4a25922f956d3..bd265938170f1 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sopk_alias.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopk_alias.s @@ -1,4 +1,46 @@ // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s s_addk_i32 s0, 0x1234 -// GFX12: s_addk_co_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb7] +// GFX12: s_addk_co_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb7] + +s_getreg_b32 s0, hwreg(HW_REG_MODE) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_MODE) ; encoding: [0x01,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_STATUS) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_STATUS) ; encoding: [0x02,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_STATE_PRIV) ; encoding: [0x04,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_GPR_ALLOC) ; encoding: [0x05,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_LDS_ALLOC) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_LDS_ALLOC) ; encoding: [0x06,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) ; encoding: [0x11,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) ; encoding: [0x12,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_TRAP_CTRL) ; encoding: [0x13,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) ; encoding: [0x14,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) ; encoding: [0x15,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_HW_ID1) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_HW_ID1) ; encoding: [0x17,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_HW_ID2) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_HW_ID2) ; encoding: [0x18,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_DVGPR_ALLOC_LO) ; encoding: [0x1f,0xf8,0x80,0xb8] + +s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) +// GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_DVGPR_ALLOC_HI) ; encoding: [0x20,0xf8,0x80,0xb8] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt index 49fa263f6bbf8..41c5724a596f9 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt @@ -82,7 +82,7 @@ # GFX12: s_getreg_b32 s0, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0x80,0xb8] 0x34,0x12,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25) ; encoding: [0xd1,0xc1,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 7, 25) ; encoding: [0xd1,0xc1,0x80,0xb8] 0xd1,0xc1,0x80,0xb8 # GFX12: s_getreg_b32 s105, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xe9,0xb8] @@ -163,7 +163,7 @@ # GFX12: s_setreg_b32 hwreg(52, 8, 3), vcc_lo ; encoding: [0x34,0x12,0x6a,0xb9] 0x34,0x12,0x6a,0xb9 -# GFX12: s_setreg_b32 hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25), s0 ; encoding: [0xd1,0xc1,0x00,0xb9] +# GFX12: s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 7, 25), s0 ; encoding: [0xd1,0xc1,0x00,0xb9] 0xd1,0xc1,0x00,0xb9 # GFX12: s_version 0x1234 ; encoding: [0x34,0x12,0x80,0xb0] @@ -187,43 +187,43 @@ # GFX12: s_version ((128|UC_VERSION_W64_BIT)|UC_VERSION_W32_BIT)|UC_VERSION_MDP_BIT ; encoding: [0x80,0xe0,0x80,0xb0] 0x80,0xe0,0x80,0xb0 -# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0xaf123456 ; encoding: [0x01,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] +# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0xaf123456 ; encoding: [0x01,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] 0x01,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf -# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 31, 1), 0xaf123456 ; encoding: [0xc1,0x07,0x80,0xb9,0x56,0x34,0x12,0xaf] +# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 31, 1), 0xaf123456 ; encoding: [0xc1,0x07,0x80,0xb9,0x56,0x34,0x12,0xaf] 0xc1,0x07,0x80,0xb9,0x56,0x34,0x12,0xaf -# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_STATUS), 0xaf123456 ; encoding: [0x02,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] +# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_STATUS), 0xaf123456 ; encoding: [0x02,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] 0x02,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf -# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC), 0xaf123456 ; encoding: [0x05,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] +# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_GPR_ALLOC), 0xaf123456 ; encoding: [0x05,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] 0x05,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf -# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_LDS_ALLOC), 0xaf123456 ; encoding: [0x06,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] +# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_LDS_ALLOC), 0xaf123456 ; encoding: [0x06,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] 0x06,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf # GFX12: s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0xaf123456 ; encoding: [0x07,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] 0x07,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf -# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_HW_ID1), 0xaf123456 ; encoding: [0x17,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] +# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_HW_ID1), 0xaf123456 ; encoding: [0x17,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] 0x17,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf -# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_HW_ID2), 0xaf123456 ; encoding: [0x18,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] +# GFX12: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_HW_ID2), 0xaf123456 ; encoding: [0x18,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf] 0x18,0xf8,0x80,0xb9,0x56,0x34,0x12,0xaf -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_MODE) ; encoding: [0x01,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_MODE) ; encoding: [0x01,0xf8,0x80,0xb8] 0x01,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATUS) ; encoding: [0x02,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_STATUS) ; encoding: [0x02,0xf8,0x80,0xb8] 0x02,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) ; encoding: [0x04,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_STATE_PRIV) ; encoding: [0x04,0xf8,0x80,0xb8] 0x04,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) ; encoding: [0x05,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_GPR_ALLOC) ; encoding: [0x05,0xf8,0x80,0xb8] 0x05,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_LDS_ALLOC) ; encoding: [0x06,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_LDS_ALLOC) ; encoding: [0x06,0xf8,0x80,0xb8] 0x06,0xf8,0x80,0xb8 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_IB_STS) ; encoding: [0x07,0xf8,0x80,0xb8] @@ -244,31 +244,31 @@ # GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2) ; encoding: [0x10,0xf8,0x80,0xb8] 0x10,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) ; encoding: [0x11,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) ; encoding: [0x11,0xf8,0x80,0xb8] 0x11,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) ; encoding: [0x12,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) ; encoding: [0x12,0xf8,0x80,0xb8] 0x12,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) ; encoding: [0x13,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_TRAP_CTRL) ; encoding: [0x13,0xf8,0x80,0xb8] 0x13,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) ; encoding: [0x14,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) ; encoding: [0x14,0xf8,0x80,0xb8] 0x14,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) ; encoding: [0x15,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) ; encoding: [0x15,0xf8,0x80,0xb8] 0x15,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID1) ; encoding: [0x17,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_HW_ID1) ; encoding: [0x17,0xf8,0x80,0xb8] 0x17,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID2) ; encoding: [0x18,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_HW_ID2) ; encoding: [0x18,0xf8,0x80,0xb8] 0x18,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) ; encoding: [0x1f,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_DVGPR_ALLOC_LO) ; encoding: [0x1f,0xf8,0x80,0xb8] 0x1f,0xf8,0x80,0xb8 -# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) ; encoding: [0x20,0xf8,0x80,0xb8] +# GFX12: s_getreg_b32 s0, hwreg(HW_REG_WAVE_DVGPR_ALLOC_HI) ; encoding: [0x20,0xf8,0x80,0xb8] 0x20,0xf8,0x80,0xb8 # GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) ; encoding: [0x1d,0xf8,0x80,0xb8] From 3f52226b44f4d6520125d3cc5b89d725c9f6d34b Mon Sep 17 00:00:00 2001 From: Aleksandar Spasojevic Date: Fri, 18 Jul 2025 16:05:28 +0200 Subject: [PATCH 2/4] [AMDGPU] Resolving comments 1 --- .../Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 179 ++++++++---------- 1 file changed, 80 insertions(+), 99 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 8803b62b7ebb0..19b0056c176ab 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -14,11 +14,9 @@ namespace llvm::AMDGPU { //===----------------------------------------------------------------------===// // Custom Operands. // -// A table of custom operands shall describe "primary" operand names first -// followed by aliases if any. It is not required but recommended to arrange -// operands so that operand encoding match operand position in the table. This -// will make getNameFromOperandTable() a bit more efficient. Unused slots in the -// table shall have an empty name. +// A table of custom operands must be ordered by Encoding in ascending order +// to enable binary search lookup. Within entries that share the same encoding, +// "primary" operand names should be listed first followed by aliases if any. // //===----------------------------------------------------------------------===// @@ -27,21 +25,22 @@ template static StringRef getNameFromOperandTable(const CustomOperand (&Table)[N], unsigned Encoding, const MCSubtargetInfo &STI) { - auto isValidIndexForEncoding = [&](size_t Idx) { - return Idx < N && Table[Idx].Encoding == Encoding && - !Table[Idx].Name.empty() && - (!Table[Idx].Cond || Table[Idx].Cond(STI)); + auto IsValid = [&](const CustomOperand &Entry) { + return Entry.Encoding == Encoding && !Entry.Name.empty() && + (!Entry.Cond || Entry.Cond(STI)); }; - // This is an optimization that should work in most cases. As a side effect, - // it may cause selection of an alias instead of a primary operand name in - // case of sparse tables. - if (isValidIndexForEncoding(Encoding)) - return Table[Encoding].Name; + // Find the first entry with the target encoding + auto First = + std::lower_bound(Table, Table + N, Encoding, + [](const CustomOperand &Entry, unsigned TargetEncoding) { + return Entry.Encoding < TargetEncoding; + }); - for (size_t Idx = 0; Idx != N; ++Idx) - if (isValidIndexForEncoding(Idx)) - return Table[Idx].Name; + // Search through entries with the same encoding to find the first valid one + for (auto It = First; It != Table + N && It->Encoding == Encoding; ++It) + if (IsValid(*It)) + return It->Name; return ""; } @@ -92,10 +91,11 @@ namespace SendMsg { // clang-format off static constexpr CustomOperand MsgOperands[] = { - {{""}}, {{"MSG_INTERRUPT"}, ID_INTERRUPT}, {{"MSG_GS"}, ID_GS_PreGFX11, isNotGFX11Plus}, + {{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus}, {{"MSG_GS_DONE"}, ID_GS_DONE_PreGFX11, isNotGFX11Plus}, + {{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus}, {{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10}, {{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9_GFX10_GFX11}, {{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9_GFX10_GFX11}, @@ -103,10 +103,8 @@ static constexpr CustomOperand MsgOperands[] = { {{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10}, {{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus}, {{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10}, - {{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10}, - {{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus}, - {{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus}, {{"MSG_SAVEWAVE_HAS_TDM"}, ID_SAVEWAVE_HAS_TDM, isGFX1250}, + {{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10}, {{"MSG_SYSMSG"}, ID_SYSMSG}, {{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, isGFX11Plus}, {{"MSG_RTN_GET_DDID"}, ID_RTN_GET_DDID, isGFX11Plus}, @@ -121,7 +119,6 @@ static constexpr CustomOperand MsgOperands[] = { }; static constexpr CustomOperand SysMsgOperands[] = { - {{""}}, {{"SYSMSG_OP_ECC_ERR_INTERRUPT"}, OP_SYS_ECC_ERR_INTERRUPT}, {{"SYSMSG_OP_REG_RD"}, OP_SYS_REG_RD}, {{"SYSMSG_OP_HOST_TRAP_ACK"}, OP_SYS_HOST_TRAP_ACK, isNotGFX9Plus}, @@ -169,83 +166,67 @@ namespace Hwreg { // NOLINTBEGIN // clang-format off static constexpr CustomOperand Operands[] = { - // GFX12+ renamed registers - {{"HW_REG_WAVE_MODE"}, ID_MODE, isGFX12Plus}, - {{"HW_REG_WAVE_STATUS"}, ID_STATUS, isGFX12Plus}, - {{"HW_REG_WAVE_GPR_ALLOC"}, ID_GPR_ALLOC, isGFX12Plus}, - {{"HW_REG_WAVE_LDS_ALLOC"}, ID_LDS_ALLOC, isGFX12Plus}, - {{"HW_REG_WAVE_HW_ID1"}, ID_HW_ID1, isGFX12Plus}, - {{"HW_REG_WAVE_HW_ID2"}, ID_HW_ID2, isGFX12Plus}, - - {{"HW_REG_MODE"}, ID_MODE}, - {{"HW_REG_STATUS"}, ID_STATUS}, - {{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus}, - {{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus}, - {{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC}, - {{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC}, - {{"HW_REG_IB_STS"}, ID_IB_STS}, - {{""}}, - {{""}}, - {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus}, - {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus}, - {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus}, - {{""}}, - {{""}}, - {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9_GFX10_GFX11}, - {{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10}, - {{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10}, - {{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10}, - {{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10}, - {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10_GFX11}, - {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10_GFX11}, - {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030}, - {{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus}, - {{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus}, - {{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10}, - {{""}}, - {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11}, - {{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250}, - {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11}, - {{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus}, - {{"HW_REG_WAVE_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus}, - {{"HW_REG_WAVE_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus}, - - // Register numbers reused in GFX11 - {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11}, - {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11}, - - // Register numbers reused in GFX12+ - {{"HW_REG_WAVE_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus}, - {{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus}, - {{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus}, - {{"HW_REG_WAVE_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus}, - {{"HW_REG_WAVE_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus}, - {{"HW_REG_WAVE_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus}, - {{"HW_REG_WAVE_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus}, - {{"HW_REG_WAVE_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, - {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus}, - - // GFX942 specific registers - {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, - {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940}, - {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940}, - {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940}, - {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940}, - - // GFX1250 - {{"HW_REG_XNACK_STATE_PRIV"}, ID_XNACK_STATE_PRIV, isGFX1250}, - {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK_gfx1250, isGFX1250}, - - // Aliases - {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, - {{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus}, - {{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus}, - {{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus}, - {{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus}, - {{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus}, - {{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, - {{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus}, - {{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus}, + {{"HW_REG_WAVE_MODE"}, ID_MODE, isGFX12Plus}, + {{"HW_REG_MODE"}, ID_MODE}, + {{"HW_REG_WAVE_STATUS"}, ID_STATUS, isGFX12Plus}, + {{"HW_REG_STATUS"}, ID_STATUS}, + {{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus}, + {{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus}, + {{"HW_REG_WAVE_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus}, + {{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus}, + {{"HW_REG_WAVE_GPR_ALLOC"}, ID_GPR_ALLOC, isGFX12Plus}, + {{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC}, + {{"HW_REG_WAVE_LDS_ALLOC"}, ID_LDS_ALLOC, isGFX12Plus}, + {{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC}, + {{"HW_REG_IB_STS"}, ID_IB_STS}, + {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus}, + {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus}, + {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus}, + {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9_GFX10_GFX11}, + {{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus}, + {{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10}, + {{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus}, + {{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10}, + {{"HW_REG_WAVE_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus}, + {{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus}, + {{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10}, + {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11}, + {{"HW_REG_WAVE_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus}, + {{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus}, + {{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10}, + {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11}, + {{"HW_REG_WAVE_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus}, + {{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus}, + {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10_GFX11}, + {{"HW_REG_WAVE_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus}, + {{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus}, + {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, + {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10_GFX11}, + {{"HW_REG_WAVE_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, + {{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940}, + {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940}, + {{"HW_REG_WAVE_HW_ID1"}, ID_HW_ID1, isGFX12Plus}, + {{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus}, + {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940}, + {{"HW_REG_WAVE_HW_ID2"}, ID_HW_ID2, isGFX12Plus}, + {{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940}, + {{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10}, + {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11}, + {{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250}, + {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11}, + {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus}, + {{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus}, + {{"HW_REG_WAVE_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus}, + {{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus}, + {{"HW_REG_WAVE_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus}, + {{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus}, + {{"HW_REG_XNACK_STATE_PRIV"}, ID_XNACK_STATE_PRIV, isGFX1250}, + {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK_gfx1250, isGFX1250}, + }; // clang-format on // NOLINTEND From 23e5c3863e8f9932bf8f39d1086c8cc72aef1c6f Mon Sep 17 00:00:00 2001 From: Aleksandar Spasojevic Date: Thu, 25 Sep 2025 13:04:15 +0200 Subject: [PATCH 3/4] [AMDGPU] Resolving comments 2 --- llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 19b0056c176ab..a034425eb18e5 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -25,11 +25,6 @@ template static StringRef getNameFromOperandTable(const CustomOperand (&Table)[N], unsigned Encoding, const MCSubtargetInfo &STI) { - auto IsValid = [&](const CustomOperand &Entry) { - return Entry.Encoding == Encoding && !Entry.Name.empty() && - (!Entry.Cond || Entry.Cond(STI)); - }; - // Find the first entry with the target encoding auto First = std::lower_bound(Table, Table + N, Encoding, @@ -39,7 +34,7 @@ static StringRef getNameFromOperandTable(const CustomOperand (&Table)[N], // Search through entries with the same encoding to find the first valid one for (auto It = First; It != Table + N && It->Encoding == Encoding; ++It) - if (IsValid(*It)) + if (It->Encoding == Encoding && (!It->Cond || It->Cond(STI))) return It->Name; return ""; From 5e85570ece885802f096a287b6cea876c0e0a30f Mon Sep 17 00:00:00 2001 From: Aleksandar Spasojevic Date: Thu, 25 Sep 2025 16:56:27 +0200 Subject: [PATCH 4/4] [AMDGPU] Resolving comments 3 --- llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index a034425eb18e5..6489e63d4f6b8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -33,9 +33,10 @@ static StringRef getNameFromOperandTable(const CustomOperand (&Table)[N], }); // Search through entries with the same encoding to find the first valid one - for (auto It = First; It != Table + N && It->Encoding == Encoding; ++It) + for (auto It = First; It != Table + N && It->Encoding == Encoding; ++It) { if (It->Encoding == Encoding && (!It->Cond || It->Cond(STI))) return It->Name; + } return ""; }