Skip to content

Commit 9f5b2fd

Browse files
committed
Update conditions for setting up base pointer
1 parent 14248c2 commit 9f5b2fd

File tree

7 files changed

+46
-133
lines changed

7 files changed

+46
-133
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
529529
// off the base pointer in the epilog to restore the stack frame.
530530
const MachineFrameInfo &MFI = MF.getFrameInfo();
531531
return (MFI.getNumFixedObjects() && shouldRealignStack(MF)) ||
532-
MFI.hasVarSizedObjects();
532+
(MFI.hasVarSizedObjects() && shouldRealignStack(MF));
533533
}
534534

535535
Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }

llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
6969
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7070
; GFX9-NEXT: s_mov_b32 s7, s33
7171
; GFX9-NEXT: s_mov_b32 s33, s32
72-
; GFX9-NEXT: s_mov_b32 s8, s34
73-
; GFX9-NEXT: s_mov_b32 s34, s32
7472
; GFX9-NEXT: s_addk_i32 s32, 0x400
7573
; GFX9-NEXT: s_getpc_b64 s[4:5]
7674
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -82,7 +80,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
8280
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
8381
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8482
; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
85-
; GFX9-NEXT: s_mov_b32 s34, s8
8683
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8784
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
8885
; GFX9-NEXT: s_and_b32 s4, s4, -16
@@ -99,8 +96,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
9996
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10097
; GFX10-NEXT: s_mov_b32 s7, s33
10198
; GFX10-NEXT: s_mov_b32 s33, s32
102-
; GFX10-NEXT: s_mov_b32 s8, s34
103-
; GFX10-NEXT: s_mov_b32 s34, s32
10499
; GFX10-NEXT: s_addk_i32 s32, 0x200
105100
; GFX10-NEXT: s_getpc_b64 s[4:5]
106101
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -109,7 +104,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
109104
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
110105
; GFX10-NEXT: v_mov_b32_e32 v0, 0
111106
; GFX10-NEXT: v_mov_b32_e32 v1, s6
112-
; GFX10-NEXT: s_mov_b32 s34, s8
113107
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
114108
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
115109
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
@@ -128,16 +122,13 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
128122
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129123
; GFX11-NEXT: s_mov_b32 s3, s33
130124
; GFX11-NEXT: s_mov_b32 s33, s32
131-
; GFX11-NEXT: s_mov_b32 s4, s34
132-
; GFX11-NEXT: s_mov_b32 s34, s32
133125
; GFX11-NEXT: s_add_i32 s32, s32, 16
134126
; GFX11-NEXT: s_getpc_b64 s[0:1]
135127
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
136128
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
137129
; GFX11-NEXT: s_mov_b32 s2, s32
138130
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
139131
; GFX11-NEXT: v_mov_b32_e32 v0, 0
140-
; GFX11-NEXT: s_mov_b32 s34, s4
141132
; GFX11-NEXT: scratch_store_b32 off, v0, s2
142133
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
143134
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -222,8 +213,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
222213
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223214
; GFX9-NEXT: s_mov_b32 s7, s33
224215
; GFX9-NEXT: s_mov_b32 s33, s32
225-
; GFX9-NEXT: s_mov_b32 s8, s34
226-
; GFX9-NEXT: s_mov_b32 s34, s32
227216
; GFX9-NEXT: s_addk_i32 s32, 0x400
228217
; GFX9-NEXT: s_getpc_b64 s[4:5]
229218
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -235,7 +224,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
235224
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
236225
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
237226
; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
238-
; GFX9-NEXT: s_mov_b32 s34, s8
239227
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
240228
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
241229
; GFX9-NEXT: s_and_b32 s4, s4, -16
@@ -252,8 +240,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
252240
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253241
; GFX10-NEXT: s_mov_b32 s7, s33
254242
; GFX10-NEXT: s_mov_b32 s33, s32
255-
; GFX10-NEXT: s_mov_b32 s8, s34
256-
; GFX10-NEXT: s_mov_b32 s34, s32
257243
; GFX10-NEXT: s_addk_i32 s32, 0x200
258244
; GFX10-NEXT: s_getpc_b64 s[4:5]
259245
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -262,7 +248,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
262248
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
263249
; GFX10-NEXT: v_mov_b32_e32 v0, 0
264250
; GFX10-NEXT: v_mov_b32_e32 v1, s6
265-
; GFX10-NEXT: s_mov_b32 s34, s8
266251
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
267252
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
268253
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
@@ -281,16 +266,13 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
281266
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282267
; GFX11-NEXT: s_mov_b32 s3, s33
283268
; GFX11-NEXT: s_mov_b32 s33, s32
284-
; GFX11-NEXT: s_mov_b32 s4, s34
285-
; GFX11-NEXT: s_mov_b32 s34, s32
286269
; GFX11-NEXT: s_add_i32 s32, s32, 16
287270
; GFX11-NEXT: s_getpc_b64 s[0:1]
288271
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
289272
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
290273
; GFX11-NEXT: s_mov_b32 s2, s32
291274
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
292275
; GFX11-NEXT: v_mov_b32_e32 v0, 0
293-
; GFX11-NEXT: s_mov_b32 s34, s4
294276
; GFX11-NEXT: scratch_store_b32 off, v0, s2
295277
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
296278
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0

llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,8 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3
151151
; GCN: ; %bb.0: ; %entry
152152
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153153
; GCN-NEXT: s_mov_b32 s7, s33
154-
; GCN-NEXT: s_mov_b32 s8, s34
155154
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
156155
; GCN-NEXT: s_mov_b32 s33, s32
157-
; GCN-NEXT: s_mov_b32 s34, s32
158156
; GCN-NEXT: s_addk_i32 s32, 0x400
159157
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
160158
; GCN-NEXT: s_cbranch_execz .LBB2_3
@@ -183,7 +181,6 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3
183181
; GCN-NEXT: s_add_i32 s32, s33, 0x400
184182
; GCN-NEXT: global_store_dword v[0:1], v0, off
185183
; GCN-NEXT: s_waitcnt vmcnt(0)
186-
; GCN-NEXT: s_mov_b32 s34, s8
187184
; GCN-NEXT: s_addk_i32 s32, 0xfc00
188185
; GCN-NEXT: s_mov_b32 s33, s7
189186
; GCN-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/amdpal-callable.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,15 +149,15 @@ attributes #0 = { nounwind }
149149
; GCN-NEXT: dynamic_stack:
150150
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
151151
; GCN-NEXT: .lds_size: 0{{$}}
152-
; GCN-NEXT: .sgpr_count: 0x2a{{$}}
152+
; GCN-NEXT: .sgpr_count: 0x28{{$}}
153153
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
154154
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
155155
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
156156
; GCN-NEXT: dynamic_stack_loop:
157157
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
158158
; GCN-NEXT: .lds_size: 0{{$}}
159-
; SDAG-NEXT: .sgpr_count: 0x27{{$}}
160-
; GISEL-NEXT: .sgpr_count: 0x28{{$}}
159+
; SDAG-NEXT: .sgpr_count: 0x25{{$}}
160+
; GISEL-NEXT: .sgpr_count: 0x26{{$}}
161161
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
162162
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
163163
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
@@ -182,22 +182,22 @@ attributes #0 = { nounwind }
182182
; GCN-NEXT: no_stack_extern_call:
183183
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
184184
; GCN-NEXT: .lds_size: 0{{$}}
185-
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
186-
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
185+
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
186+
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
187187
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
188188
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
189189
; GCN-NEXT: no_stack_extern_call_many_args:
190190
; GCN-NEXT: .backend_stack_size: 0x90{{$}}
191191
; GCN-NEXT: .lds_size: 0{{$}}
192-
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
193-
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
192+
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
193+
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
194194
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
195195
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
196196
; GCN-NEXT: no_stack_indirect_call:
197197
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
198198
; GCN-NEXT: .lds_size: 0{{$}}
199-
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
200-
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
199+
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
200+
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
201201
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
202202
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
203203
; GCN-NEXT: simple_lds:
@@ -227,15 +227,15 @@ attributes #0 = { nounwind }
227227
; GCN-NEXT: simple_stack_extern_call:
228228
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
229229
; GCN-NEXT: .lds_size: 0{{$}}
230-
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
231-
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
230+
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
231+
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
232232
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
233233
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
234234
; GCN-NEXT: simple_stack_indirect_call:
235235
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
236236
; GCN-NEXT: .lds_size: 0{{$}}
237-
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
238-
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
237+
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
238+
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
239239
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
240240
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
241241
; GCN-NEXT: simple_stack_recurse:

0 commit comments

Comments
 (0)