Skip to content

Commit 0c47e9d

Browse files
committed
Restore SP in functions with dynamic allocas
1 parent 52c338d commit 0c47e9d

File tree

8 files changed

+293
-78
lines changed

8 files changed

+293
-78
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
12591259
Register FramePtrRegScratchCopy;
12601260
Register SGPRForFPSaveRestoreCopy =
12611261
FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1262+
1263+
if (MFI.hasVarSizedObjects()) {
1264+
assert(TRI.hasBasePointer(MF) &&
1265+
"Variable sized objects require base pointer to be setup!");
1266+
Register BasePtrReg = TRI.getBaseRegister();
1267+
// Restore SP to fixed frame size
1268+
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1269+
.addReg(BasePtrReg)
1270+
.addImm(RoundedSize * getScratchScaleFactor(ST))
1271+
.setMIFlag(MachineInstr::FrameDestroy);
1272+
}
12621273
if (FPSaved) {
12631274
// CSR spill restores should use FP as base register. If
12641275
// SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,11 @@ Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
525525
bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
526526
// When we need stack realignment, we can't reference off of the
527527
// stack pointer, so we reserve a base pointer.
528+
// For functions with dynamically sized stack objects, we need to reference
529+
// off the base pointer in the epilog to restore the stack frame.
528530
const MachineFrameInfo &MFI = MF.getFrameInfo();
529-
return MFI.getNumFixedObjects() && shouldRealignStack(MF);
531+
return (MFI.getNumFixedObjects() && shouldRealignStack(MF)) ||
532+
MFI.hasVarSizedObjects();
530533
}
531534

532535
Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }

llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
6969
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7070
; GFX9-NEXT: s_mov_b32 s7, s33
7171
; GFX9-NEXT: s_mov_b32 s33, s32
72+
; GFX9-NEXT: s_mov_b32 s8, s34
73+
; GFX9-NEXT: s_mov_b32 s34, s32
7274
; GFX9-NEXT: s_addk_i32 s32, 0x400
7375
; GFX9-NEXT: s_getpc_b64 s[4:5]
7476
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -86,6 +88,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
8688
; GFX9-NEXT: s_and_b32 s4, s4, -16
8789
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
8890
; GFX9-NEXT: s_add_u32 s32, s6, s4
91+
; GFX9-NEXT: s_add_i32 s32, s34, 0x400
92+
; GFX9-NEXT: s_mov_b32 s34, s8
8993
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9094
; GFX9-NEXT: s_waitcnt vmcnt(0)
9195
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -95,6 +99,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
9599
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96100
; GFX10-NEXT: s_mov_b32 s7, s33
97101
; GFX10-NEXT: s_mov_b32 s33, s32
102+
; GFX10-NEXT: s_mov_b32 s8, s34
103+
; GFX10-NEXT: s_mov_b32 s34, s32
98104
; GFX10-NEXT: s_addk_i32 s32, 0x200
99105
; GFX10-NEXT: s_getpc_b64 s[4:5]
100106
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -112,6 +118,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
112118
; GFX10-NEXT: s_and_b32 s4, s4, -16
113119
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
114120
; GFX10-NEXT: s_add_u32 s32, s6, s4
121+
; GFX10-NEXT: s_add_i32 s32, s34, 0x200
122+
; GFX10-NEXT: s_mov_b32 s34, s8
115123
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
116124
; GFX10-NEXT: s_setpc_b64 s[30:31]
117125
;
@@ -120,13 +128,15 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
120128
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121129
; GFX11-NEXT: s_mov_b32 s3, s33
122130
; GFX11-NEXT: s_mov_b32 s33, s32
131+
; GFX11-NEXT: s_mov_b32 s4, s34
132+
; GFX11-NEXT: s_mov_b32 s34, s32
123133
; GFX11-NEXT: s_add_i32 s32, s32, 16
124134
; GFX11-NEXT: s_getpc_b64 s[0:1]
125135
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
126136
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
127-
; GFX11-NEXT: v_mov_b32_e32 v0, 0
128-
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129137
; GFX11-NEXT: s_mov_b32 s2, s32
138+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
139+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
130140
; GFX11-NEXT: s_mov_b32 s33, s3
131141
; GFX11-NEXT: scratch_store_b32 off, v0, s2
132142
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -136,8 +146,10 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
136146
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
137147
; GFX11-NEXT: s_and_b32 s0, s0, -16
138148
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
139-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
149+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
140150
; GFX11-NEXT: s_add_u32 s32, s2, s0
151+
; GFX11-NEXT: s_add_i32 s32, s34, 16
152+
; GFX11-NEXT: s_mov_b32 s34, s4
141153
; GFX11-NEXT: s_add_i32 s32, s32, -16
142154
; GFX11-NEXT: s_setpc_b64 s[30:31]
143155
%n = load i32, ptr addrspace(4) @gv, align 4
@@ -210,6 +222,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
210222
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211223
; GFX9-NEXT: s_mov_b32 s7, s33
212224
; GFX9-NEXT: s_mov_b32 s33, s32
225+
; GFX9-NEXT: s_mov_b32 s8, s34
226+
; GFX9-NEXT: s_mov_b32 s34, s32
213227
; GFX9-NEXT: s_addk_i32 s32, 0x400
214228
; GFX9-NEXT: s_getpc_b64 s[4:5]
215229
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -227,6 +241,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
227241
; GFX9-NEXT: s_and_b32 s4, s4, -16
228242
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
229243
; GFX9-NEXT: s_add_u32 s32, s6, s4
244+
; GFX9-NEXT: s_add_i32 s32, s34, 0x400
245+
; GFX9-NEXT: s_mov_b32 s34, s8
230246
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
231247
; GFX9-NEXT: s_waitcnt vmcnt(0)
232248
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -236,6 +252,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
236252
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237253
; GFX10-NEXT: s_mov_b32 s7, s33
238254
; GFX10-NEXT: s_mov_b32 s33, s32
255+
; GFX10-NEXT: s_mov_b32 s8, s34
256+
; GFX10-NEXT: s_mov_b32 s34, s32
239257
; GFX10-NEXT: s_addk_i32 s32, 0x200
240258
; GFX10-NEXT: s_getpc_b64 s[4:5]
241259
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -253,6 +271,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
253271
; GFX10-NEXT: s_and_b32 s4, s4, -16
254272
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
255273
; GFX10-NEXT: s_add_u32 s32, s6, s4
274+
; GFX10-NEXT: s_add_i32 s32, s34, 0x200
275+
; GFX10-NEXT: s_mov_b32 s34, s8
256276
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
257277
; GFX10-NEXT: s_setpc_b64 s[30:31]
258278
;
@@ -261,13 +281,15 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
261281
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262282
; GFX11-NEXT: s_mov_b32 s3, s33
263283
; GFX11-NEXT: s_mov_b32 s33, s32
284+
; GFX11-NEXT: s_mov_b32 s4, s34
285+
; GFX11-NEXT: s_mov_b32 s34, s32
264286
; GFX11-NEXT: s_add_i32 s32, s32, 16
265287
; GFX11-NEXT: s_getpc_b64 s[0:1]
266288
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
267289
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
268-
; GFX11-NEXT: v_mov_b32_e32 v0, 0
269-
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
270290
; GFX11-NEXT: s_mov_b32 s2, s32
291+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
292+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
271293
; GFX11-NEXT: s_mov_b32 s33, s3
272294
; GFX11-NEXT: scratch_store_b32 off, v0, s2
273295
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -277,8 +299,10 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
277299
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
278300
; GFX11-NEXT: s_and_b32 s0, s0, -16
279301
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
280-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
302+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
281303
; GFX11-NEXT: s_add_u32 s32, s2, s0
304+
; GFX11-NEXT: s_add_i32 s32, s34, 16
305+
; GFX11-NEXT: s_mov_b32 s34, s4
282306
; GFX11-NEXT: s_add_i32 s32, s32, -16
283307
; GFX11-NEXT: s_setpc_b64 s[30:31]
284308
%n = load i32, ptr addrspace(4) @gv, align 16
@@ -355,6 +379,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
355379
; GFX9-NEXT: s_mov_b32 s6, s33
356380
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
357381
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
382+
; GFX9-NEXT: s_mov_b32 s7, s34
383+
; GFX9-NEXT: s_mov_b32 s34, s32
358384
; GFX9-NEXT: s_addk_i32 s32, 0x1000
359385
; GFX9-NEXT: s_getpc_b64 s[4:5]
360386
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -373,6 +399,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
373399
; GFX9-NEXT: s_and_b32 s4, s4, -16
374400
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
375401
; GFX9-NEXT: s_add_u32 s32, s5, s4
402+
; GFX9-NEXT: s_add_i32 s32, s34, 0x1000
403+
; GFX9-NEXT: s_mov_b32 s34, s7
376404
; GFX9-NEXT: s_addk_i32 s32, 0xf000
377405
; GFX9-NEXT: s_waitcnt vmcnt(0)
378406
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -382,8 +410,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
382410
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383411
; GFX10-NEXT: s_mov_b32 s6, s33
384412
; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
385-
; GFX10-NEXT: s_addk_i32 s32, 0x800
413+
; GFX10-NEXT: s_mov_b32 s7, s34
386414
; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
415+
; GFX10-NEXT: s_mov_b32 s34, s32
416+
; GFX10-NEXT: s_addk_i32 s32, 0x800
387417
; GFX10-NEXT: s_getpc_b64 s[4:5]
388418
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
389419
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@@ -401,6 +431,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
401431
; GFX10-NEXT: s_and_b32 s4, s4, -16
402432
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
403433
; GFX10-NEXT: s_add_u32 s32, s5, s4
434+
; GFX10-NEXT: s_add_i32 s32, s34, 0x800
435+
; GFX10-NEXT: s_mov_b32 s34, s7
404436
; GFX10-NEXT: s_addk_i32 s32, 0xf800
405437
; GFX10-NEXT: s_setpc_b64 s[30:31]
406438
;
@@ -409,8 +441,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
409441
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410442
; GFX11-NEXT: s_mov_b32 s2, s33
411443
; GFX11-NEXT: s_add_i32 s33, s32, 31
412-
; GFX11-NEXT: s_add_i32 s32, s32, 64
444+
; GFX11-NEXT: s_mov_b32 s3, s34
413445
; GFX11-NEXT: s_and_not1_b32 s33, s33, 31
446+
; GFX11-NEXT: s_mov_b32 s34, s32
447+
; GFX11-NEXT: s_add_i32 s32, s32, 64
414448
; GFX11-NEXT: s_getpc_b64 s[0:1]
415449
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
416450
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
@@ -429,7 +463,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
429463
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
430464
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
431465
; GFX11-NEXT: s_add_u32 s32, s1, s0
432-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
466+
; GFX11-NEXT: s_add_i32 s32, s34, 64
467+
; GFX11-NEXT: s_mov_b32 s34, s3
433468
; GFX11-NEXT: s_addk_i32 s32, 0xffc0
434469
; GFX11-NEXT: s_setpc_b64 s[30:31]
435470
%n = load i32, ptr addrspace(4) @gv

llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,10 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3
151151
; GCN: ; %bb.0: ; %entry
152152
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153153
; GCN-NEXT: s_mov_b32 s7, s33
154+
; GCN-NEXT: s_mov_b32 s8, s34
154155
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
155156
; GCN-NEXT: s_mov_b32 s33, s32
157+
; GCN-NEXT: s_mov_b32 s34, s32
156158
; GCN-NEXT: s_addk_i32 s32, 0x400
157159
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
158160
; GCN-NEXT: s_cbranch_execz .LBB2_3
@@ -178,8 +180,10 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3
178180
; GCN-NEXT: .LBB2_3: ; %bb.2
179181
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
180182
; GCN-NEXT: v_mov_b32_e32 v0, 0
183+
; GCN-NEXT: s_add_i32 s32, s34, 0x400
181184
; GCN-NEXT: global_store_dword v[0:1], v0, off
182185
; GCN-NEXT: s_waitcnt vmcnt(0)
186+
; GCN-NEXT: s_mov_b32 s34, s8
183187
; GCN-NEXT: s_addk_i32 s32, 0xfc00
184188
; GCN-NEXT: s_mov_b32 s33, s7
185189
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -216,8 +220,10 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i
216220
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217221
; GCN-NEXT: s_mov_b32 s7, s33
218222
; GCN-NEXT: s_add_i32 s33, s32, 0xfc0
223+
; GCN-NEXT: s_mov_b32 s8, s34
219224
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
220225
; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000
226+
; GCN-NEXT: s_mov_b32 s34, s32
221227
; GCN-NEXT: s_addk_i32 s32, 0x2000
222228
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
223229
; GCN-NEXT: s_cbranch_execz .LBB3_2
@@ -240,8 +246,10 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i
240246
; GCN-NEXT: .LBB3_2: ; %bb.1
241247
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
242248
; GCN-NEXT: v_mov_b32_e32 v0, 0
249+
; GCN-NEXT: s_add_i32 s32, s34, 0x2000
243250
; GCN-NEXT: global_store_dword v[0:1], v0, off
244251
; GCN-NEXT: s_waitcnt vmcnt(0)
252+
; GCN-NEXT: s_mov_b32 s34, s8
245253
; GCN-NEXT: s_addk_i32 s32, 0xe000
246254
; GCN-NEXT: s_mov_b32 s33, s7
247255
; GCN-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/amdpal-callable.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,15 +149,15 @@ attributes #0 = { nounwind }
149149
; GCN-NEXT: dynamic_stack:
150150
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
151151
; GCN-NEXT: .lds_size: 0{{$}}
152-
; GCN-NEXT: .sgpr_count: 0x28{{$}}
152+
; GCN-NEXT: .sgpr_count: 0x2a{{$}}
153153
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
154154
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
155155
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
156156
; GCN-NEXT: dynamic_stack_loop:
157157
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
158158
; GCN-NEXT: .lds_size: 0{{$}}
159-
; SDAG-NEXT: .sgpr_count: 0x25{{$}}
160-
; GISEL-NEXT: .sgpr_count: 0x26{{$}}
159+
; SDAG-NEXT: .sgpr_count: 0x27{{$}}
160+
; GISEL-NEXT: .sgpr_count: 0x28{{$}}
161161
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
162162
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
163163
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
@@ -182,22 +182,22 @@ attributes #0 = { nounwind }
182182
; GCN-NEXT: no_stack_extern_call:
183183
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
184184
; GCN-NEXT: .lds_size: 0{{$}}
185-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
186-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
185+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
186+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
187187
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
188188
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
189189
; GCN-NEXT: no_stack_extern_call_many_args:
190190
; GCN-NEXT: .backend_stack_size: 0x90{{$}}
191191
; GCN-NEXT: .lds_size: 0{{$}}
192-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
193-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
192+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
193+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
194194
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
195195
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
196196
; GCN-NEXT: no_stack_indirect_call:
197197
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
198198
; GCN-NEXT: .lds_size: 0{{$}}
199-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
200-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
199+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
200+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
201201
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
202202
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
203203
; GCN-NEXT: simple_lds:
@@ -227,15 +227,15 @@ attributes #0 = { nounwind }
227227
; GCN-NEXT: simple_stack_extern_call:
228228
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
229229
; GCN-NEXT: .lds_size: 0{{$}}
230-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
231-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
230+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
231+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
232232
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
233233
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
234234
; GCN-NEXT: simple_stack_indirect_call:
235235
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
236236
; GCN-NEXT: .lds_size: 0{{$}}
237-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
238-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
237+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
238+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
239239
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
240240
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
241241
; GCN-NEXT: simple_stack_recurse:

0 commit comments

Comments
 (0)