Skip to content

Commit 18331e0

Browse files
committed
[AMDGPU] Initialize FrameOffsetReg for amdgpu_cs_chain functions
Functions with the amdgpu_cs_chain calling convention were not initializing FrameOffsetReg, leaving it as FP_REG. This caused machine code verification failures as SCRATCH_STORE_DWORD_SADDR instructions require the saddr operand to be in the SReg_32_XEXEC_HI register class.
1 parent 2ec01e4 commit 18331e0

File tree

2 files changed

+65
-0
lines changed

2 files changed

+65
-0
lines changed

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
9898
// set one up. For now, we can use s32 to match what amdgpu_gfx functions
9999
// would use if called, but this can be revisited.
100100
// FIXME: Only reserve this if we actually need it.
101+
FrameOffsetReg = AMDGPU::SGPR33;
101102
StackPtrOffsetReg = AMDGPU::SGPR32;
102103

103104
ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -O0 -verify-machineinstrs < %s | FileCheck %s
3+
4+
define amdgpu_cs_chain void @indirect(ptr %callee) {
5+
; CHECK-LABEL: indirect:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; CHECK-NEXT: s_mov_b32 s32, 16
9+
; CHECK-NEXT: s_xor_saveexec_b64 s[0:1], -1
10+
; CHECK-NEXT: scratch_store_dword off, v40, off ; 4-byte Folded Spill
11+
; CHECK-NEXT: scratch_store_dword off, v41, off offset:4 ; 4-byte Folded Spill
12+
; CHECK-NEXT: s_mov_b64 exec, s[0:1]
13+
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
14+
; CHECK-NEXT: ; implicit-def: $sgpr10_sgpr11
15+
; CHECK-NEXT: ; implicit-def: $sgpr0
16+
; CHECK-NEXT: s_mov_b32 s1, 0
17+
; CHECK-NEXT: ; implicit-def: $vgpr40 : SGPR spill to VGPR lane
18+
; CHECK-NEXT: v_writelane_b32 v40, s1, 0
19+
; CHECK-NEXT: v_mov_b32_e32 v0, s1
20+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
21+
; CHECK-NEXT: s_mov_b64 s[4:5], s[6:7]
22+
; CHECK-NEXT: s_mov_b64 s[8:9], 36
23+
; CHECK-NEXT: s_mov_b32 s12, s0
24+
; CHECK-NEXT: s_mov_b32 s13, s0
25+
; CHECK-NEXT: s_mov_b32 s14, s0
26+
; CHECK-NEXT: s_mov_b32 s15, s0
27+
; CHECK-NEXT: v_mov_b32_e32 v31, s0
28+
; CHECK-NEXT: s_getpc_b64 s[0:1]
29+
; CHECK-NEXT: s_add_u32 s0, s0, indirect@gotpcrel32@lo+4
30+
; CHECK-NEXT: s_addc_u32 s1, s1, indirect@gotpcrel32@hi+12
31+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
32+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
33+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
34+
; CHECK-NEXT: v_readlane_b32 s3, v40, 0
35+
; CHECK-NEXT: s_nop 1
36+
; CHECK-NEXT: v_mov_b32_e32 v0, s3
37+
; CHECK-NEXT: s_nop 0
38+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
39+
; CHECK-NEXT: v_mov_b32_e32 v0, s3
40+
; CHECK-NEXT: s_nop 0
41+
; CHECK-NEXT: v_readfirstlane_b32 s1, v0
42+
; CHECK-NEXT: v_mov_b32_e32 v0, s3
43+
; CHECK-NEXT: s_nop 0
44+
; CHECK-NEXT: v_readfirstlane_b32 s2, v0
45+
; CHECK-NEXT: v_mov_b32_e32 v8, s3
46+
; CHECK-NEXT: s_mov_b32 s4, 0
47+
; CHECK-NEXT: v_mov_b32_e32 v9, s4
48+
; CHECK-NEXT: v_mov_b32_e32 v10, s3
49+
; CHECK-NEXT: v_mov_b32_e32 v11, s3
50+
; CHECK-NEXT: s_mov_b64 s[4:5], 0
51+
; CHECK-NEXT: v_readlane_b32 s3, v41, 0
52+
; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1
53+
; CHECK-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
54+
; CHECK-NEXT: scratch_load_dword v41, off, s33 offset:4 ; 4-byte Folded Reload
55+
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
56+
; CHECK-NEXT: s_mov_b32 s33, s3
57+
; CHECK-NEXT: s_mov_b64 exec, 0
58+
; CHECK-NEXT: s_setpc_b64 s[4:5]
59+
call void @indirect(ptr null)
60+
call void (ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i64.v3i32.sl_i32p5i32i32s(ptr null, i64 0, <3 x i32> inreg zeroinitializer, { i32, ptr addrspace(5), i32, i32 } zeroinitializer, i32 0)
61+
unreachable
62+
}
63+
64+
declare void @llvm.amdgcn.cs.chain.p0.i64.v3i32.sl_i32p5i32i32s(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32 immarg, ...)

0 commit comments

Comments
 (0)