Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Sep 18, 2025

No description provided.

@RKSimon RKSimon enabled auto-merge (squash) September 18, 2025 14:40
@llvmbot
Copy link
Member

llvmbot commented Sep 18, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Simon Pilgrim (RKSimon)

Changes

Patch is 153.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159587.diff

2 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll (+1657-315)
  • (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll (+1042-339)
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
index d05424ffe773d..fccee3da6d77e 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
@@ -1,53 +1,94 @@
-; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -mcpu=gfx90a -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7,UNPACKED-TID %s
-; RUN: opt -passes=amdgpu-attributor -mcpu=gfx90a -mattr=-xnack < %s | llc -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,PACKED-TID %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -mcpu=gfx90a -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7 %s
+; RUN: opt -passes=amdgpu-attributor -mcpu=gfx90a -mattr=-xnack < %s | llc -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A %s
 
 target triple = "amdgcn-amd-amdhsa"
 
-; GCN-LABEL: {{^}}use_workitem_id_x:
-; GCN: s_waitcnt
-; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_x() #1 {
+; GFX7-LABEL: use_workitem_id_x:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_x:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_y:
-; GCN: s_waitcnt
-; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_y() #1 {
+; GFX7-LABEL: use_workitem_id_y:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_y:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_z:
-; GCN: s_waitcnt
-; GCN: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
-; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_z() #1 {
+; GFX7-LABEL: use_workitem_id_z:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_z:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_xy:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_xy() #1 {
+; GFX7-LABEL: use_workitem_id_xy:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xy:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val0, ptr addrspace(1) poison
@@ -55,17 +96,34 @@ define void @use_workitem_id_xy() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_xyz:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_xyz() #1 {
+; GFX7-LABEL: use_workitem_id_xyz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xyz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
   %val2 = call i32 @llvm.amdgcn.workitem.id.z()
@@ -75,15 +133,28 @@ define void @use_workitem_id_xyz() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_xz:
-; GCN: s_waitcnt
-; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_xz() #1 {
+; GFX7-LABEL: use_workitem_id_xz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_xz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val0, ptr addrspace(1) poison
@@ -91,15 +162,28 @@ define void @use_workitem_id_xz() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_yz:
-; GCN: s_waitcnt
-; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
-; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
-; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
-; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_setpc_b64
 define void @use_workitem_id_yz() #1 {
+; GFX7-LABEL: use_workitem_id_yz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX7-NEXT:    flat_store_dword v[0:1], v0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: use_workitem_id_yz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 10, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_bfe_u32 v0, v31, 20, 10
+; GFX90A-NEXT:    global_store_dword v[0:1], v0, off
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
   %val0 = call i32 @llvm.amdgcn.workitem.id.y()
   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val0, ptr addrspace(1) poison
@@ -107,229 +191,639 @@ define void @use_workitem_id_yz() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
-; GCN: v_mov_b32_e32 v31, v0
-; GCN: s_swappc_b64
-; GCN-NOT: v31
-
-; GCN: .amdhsa_system_vgpr_workitem_id 0
 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
+; GCN-LABEL: kern_indirect_use_workitem_id_x:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_add_u32 s0, s0, s5
+; GCN-NEXT:    s_addc_u32 s1, s1, 0
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    v_mov_b32_e32 v31, v0
+; GCN-NEXT:    s_mov_b32 s32, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    s_endpgm
   call void @use_workitem_id_x()
   ret void
 }
+; GCN: .amdhsa_system_vgpr_workitem_id 0
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
-
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v31
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 v31, 10, v1
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 1
 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_y:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v31, 10, v1
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_y:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_y()
   ret void
 }
+; GCN: .amdhsa_system_vgpr_workitem_id 1
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
-
-; GCN-NOT: v0
-; GCN-NOT: v2
-; GCN-NOT: v31
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 v31, 20, v2
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
-
-; GCN: .amdhsa_system_vgpr_workitem_id 2
 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_z:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v31, 20, v2
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_z:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_z()
   ret void
 }
+; GCN: .amdhsa_system_vgpr_workitem_id 2
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
-; GCN-NOT: v0
-; GCN-NOT: v1
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDY]]
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xy:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_xy@gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xy@gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT:    v_or_b32_e32 v31, v0, v1
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_xy:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_xy@gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xy@gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_xy()
   ret void
 }
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
-; GCN-NOT: v0
-; GCN-NOT: v2
-
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID: v_or_b32_e32 v31, v0, [[IDZ]]
-; GCN-NOT: v0
-; GCN-NOT: v2
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_xz@gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xz@gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
+; GFX7-NEXT:    v_or_b32_e32 v31, v0, v1
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_xz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_xz@gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xz@gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_xz()
   ret void
 }
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
-; GCN-NOT: v1
-; GCN-NOT: v2
-; PACKED-TID: v_mov_b32_e32 v31, v0
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID: v_or_b32_e32 v31, [[IDY]], [[IDZ]]
-; GCN-NOT: v1
-; GCN-NOT: v2
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_yz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_yz@gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_yz@gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 20, v2
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT:    v_or_b32_e32 v31, v1, v0
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: kern_indirect_use_workitem_id_yz:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_add_u32 s0, s0, s5
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_getpc_b64 s[4:5]
+; GFX90A-NEXT:    s_add_u32 s4, s4, use_workitem_id_yz@gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s5, s5, use_workitem_id_yz@gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX90A-NEXT:    s_endpgm
   call void @use_workitem_id_yz()
   ret void
 }
 
-; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v2
-
-; PACKED-TID: v_mov_b32_e32 v31, v0
-
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
-; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
-; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDY]]
-; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, [[IDZ]]
-; GCN-NOT: v0
-; GCN-NOT: v1
-; GCN-NOT: v2
-; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
+; GFX7-LABEL: kern_indirect_use_workitem_id_xyz:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_u32 s0, s0, s5
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    s_getpc_b64 s[4:5]
+; GFX7-NEXT:    s_add_u32 s4, s4, use_workitem_id_xyz@gotpcrel32@lo+4
+; GFX7-NEXT:    s_addc_u32 s5, s5, use_workitem_id_xyz@gotpcrel32@hi+12
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7-NEXT:    s_mov_b32 s32, 0
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GFX7-NEXT:    s_endpgm
+;
+; GFX90A-LABEL:...
[truncated]

@RKSimon RKSimon merged commit 6e47bff into llvm:main Sep 18, 2025
11 checks passed
@RKSimon RKSimon deleted the amdgpu-callee-vgprs-regenerate branch September 18, 2025 16:08
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants