Skip to content

Commit c34194a

Browse files
committed
[AMDGPU] Restore SP correctly in functions with dynamic allocas
Currently, the AMDGPU backend sets up FP and then increments SP by fixed size, from FP, in the prolog and decrements it by the same amount in the epilog. Prolog: tmp = sp + (alignment - 1) fp &= -alignment sp += frameSize + alignment Epilog: sp -= (frameSize + alignment) In the presence of dynamic alloca, this leads to incorrect restoration of SP. This patch enforces the presence of a base pointer for all functions with dynamic allocas, and SP is restored from the saved BP in the epilog. Prolog: tmp = sp + (alignment - 1) fp &= -alignment bp = sp sp += frameSize + alignment Epilog: sp += bp + frameSize + alignment sp -= (frameSize + alignment) (Note: for dynamic allocas with default alignment, SP can be restored with saved FP as well. However, for the sake of uniformity, presence of BP is enforced) Fixes: SWDEV-408164
1 parent 52c338d commit c34194a

File tree

8 files changed

+293
-78
lines changed

8 files changed

+293
-78
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
12591259
Register FramePtrRegScratchCopy;
12601260
Register SGPRForFPSaveRestoreCopy =
12611261
FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1262+
1263+
if (MFI.hasVarSizedObjects()) {
1264+
assert(TRI.hasBasePointer(MF) &&
1265+
"Variable sized objects require base pointer to be setup!");
1266+
Register BasePtrReg = TRI.getBaseRegister();
1267+
// Restore SP to fixed frame size
1268+
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1269+
.addReg(BasePtrReg)
1270+
.addImm(RoundedSize * getScratchScaleFactor(ST))
1271+
.setMIFlag(MachineInstr::FrameDestroy);
1272+
}
12621273
if (FPSaved) {
12631274
// CSR spill restores should use FP as base register. If
12641275
// SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,11 @@ Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
525525
bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
526526
// When we need stack realignment, we can't reference off of the
527527
// stack pointer, so we reserve a base pointer.
528+
// For functions with dynamically sized stack objects, we need to reference
529+
// off the base pointer in the epilog to restore the stack frame.
528530
const MachineFrameInfo &MFI = MF.getFrameInfo();
529-
return MFI.getNumFixedObjects() && shouldRealignStack(MF);
531+
return (MFI.getNumFixedObjects() && shouldRealignStack(MF)) ||
532+
MFI.hasVarSizedObjects();
530533
}
531534

532535
Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }

llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
6969
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7070
; GFX9-NEXT: s_mov_b32 s7, s33
7171
; GFX9-NEXT: s_mov_b32 s33, s32
72+
; GFX9-NEXT: s_mov_b32 s8, s34
73+
; GFX9-NEXT: s_mov_b32 s34, s32
7274
; GFX9-NEXT: s_addk_i32 s32, 0x400
7375
; GFX9-NEXT: s_getpc_b64 s[4:5]
7476
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -86,6 +88,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
8688
; GFX9-NEXT: s_and_b32 s4, s4, -16
8789
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
8890
; GFX9-NEXT: s_add_u32 s32, s6, s4
91+
; GFX9-NEXT: s_add_i32 s32, s34, 0x400
92+
; GFX9-NEXT: s_mov_b32 s34, s8
8993
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9094
; GFX9-NEXT: s_waitcnt vmcnt(0)
9195
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -95,6 +99,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
9599
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96100
; GFX10-NEXT: s_mov_b32 s7, s33
97101
; GFX10-NEXT: s_mov_b32 s33, s32
102+
; GFX10-NEXT: s_mov_b32 s8, s34
103+
; GFX10-NEXT: s_mov_b32 s34, s32
98104
; GFX10-NEXT: s_addk_i32 s32, 0x200
99105
; GFX10-NEXT: s_getpc_b64 s[4:5]
100106
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -112,6 +118,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
112118
; GFX10-NEXT: s_and_b32 s4, s4, -16
113119
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
114120
; GFX10-NEXT: s_add_u32 s32, s6, s4
121+
; GFX10-NEXT: s_add_i32 s32, s34, 0x200
122+
; GFX10-NEXT: s_mov_b32 s34, s8
115123
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
116124
; GFX10-NEXT: s_setpc_b64 s[30:31]
117125
;
@@ -120,13 +128,15 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
120128
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121129
; GFX11-NEXT: s_mov_b32 s3, s33
122130
; GFX11-NEXT: s_mov_b32 s33, s32
131+
; GFX11-NEXT: s_mov_b32 s4, s34
132+
; GFX11-NEXT: s_mov_b32 s34, s32
123133
; GFX11-NEXT: s_add_i32 s32, s32, 16
124134
; GFX11-NEXT: s_getpc_b64 s[0:1]
125135
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
126136
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
127-
; GFX11-NEXT: v_mov_b32_e32 v0, 0
128-
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129137
; GFX11-NEXT: s_mov_b32 s2, s32
138+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
139+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
130140
; GFX11-NEXT: s_mov_b32 s33, s3
131141
; GFX11-NEXT: scratch_store_b32 off, v0, s2
132142
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -136,8 +146,10 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
136146
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
137147
; GFX11-NEXT: s_and_b32 s0, s0, -16
138148
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
139-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
149+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
140150
; GFX11-NEXT: s_add_u32 s32, s2, s0
151+
; GFX11-NEXT: s_add_i32 s32, s34, 16
152+
; GFX11-NEXT: s_mov_b32 s34, s4
141153
; GFX11-NEXT: s_add_i32 s32, s32, -16
142154
; GFX11-NEXT: s_setpc_b64 s[30:31]
143155
%n = load i32, ptr addrspace(4) @gv, align 4
@@ -210,6 +222,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
210222
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211223
; GFX9-NEXT: s_mov_b32 s7, s33
212224
; GFX9-NEXT: s_mov_b32 s33, s32
225+
; GFX9-NEXT: s_mov_b32 s8, s34
226+
; GFX9-NEXT: s_mov_b32 s34, s32
213227
; GFX9-NEXT: s_addk_i32 s32, 0x400
214228
; GFX9-NEXT: s_getpc_b64 s[4:5]
215229
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -227,6 +241,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
227241
; GFX9-NEXT: s_and_b32 s4, s4, -16
228242
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
229243
; GFX9-NEXT: s_add_u32 s32, s6, s4
244+
; GFX9-NEXT: s_add_i32 s32, s34, 0x400
245+
; GFX9-NEXT: s_mov_b32 s34, s8
230246
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
231247
; GFX9-NEXT: s_waitcnt vmcnt(0)
232248
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -236,6 +252,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
236252
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237253
; GFX10-NEXT: s_mov_b32 s7, s33
238254
; GFX10-NEXT: s_mov_b32 s33, s32
255+
; GFX10-NEXT: s_mov_b32 s8, s34
256+
; GFX10-NEXT: s_mov_b32 s34, s32
239257
; GFX10-NEXT: s_addk_i32 s32, 0x200
240258
; GFX10-NEXT: s_getpc_b64 s[4:5]
241259
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -253,6 +271,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
253271
; GFX10-NEXT: s_and_b32 s4, s4, -16
254272
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
255273
; GFX10-NEXT: s_add_u32 s32, s6, s4
274+
; GFX10-NEXT: s_add_i32 s32, s34, 0x200
275+
; GFX10-NEXT: s_mov_b32 s34, s8
256276
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
257277
; GFX10-NEXT: s_setpc_b64 s[30:31]
258278
;
@@ -261,13 +281,15 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
261281
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262282
; GFX11-NEXT: s_mov_b32 s3, s33
263283
; GFX11-NEXT: s_mov_b32 s33, s32
284+
; GFX11-NEXT: s_mov_b32 s4, s34
285+
; GFX11-NEXT: s_mov_b32 s34, s32
264286
; GFX11-NEXT: s_add_i32 s32, s32, 16
265287
; GFX11-NEXT: s_getpc_b64 s[0:1]
266288
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
267289
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
268-
; GFX11-NEXT: v_mov_b32_e32 v0, 0
269-
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
270290
; GFX11-NEXT: s_mov_b32 s2, s32
291+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
292+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
271293
; GFX11-NEXT: s_mov_b32 s33, s3
272294
; GFX11-NEXT: scratch_store_b32 off, v0, s2
273295
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -277,8 +299,10 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
277299
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
278300
; GFX11-NEXT: s_and_b32 s0, s0, -16
279301
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
280-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
302+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
281303
; GFX11-NEXT: s_add_u32 s32, s2, s0
304+
; GFX11-NEXT: s_add_i32 s32, s34, 16
305+
; GFX11-NEXT: s_mov_b32 s34, s4
282306
; GFX11-NEXT: s_add_i32 s32, s32, -16
283307
; GFX11-NEXT: s_setpc_b64 s[30:31]
284308
%n = load i32, ptr addrspace(4) @gv, align 16
@@ -355,6 +379,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
355379
; GFX9-NEXT: s_mov_b32 s6, s33
356380
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
357381
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
382+
; GFX9-NEXT: s_mov_b32 s7, s34
383+
; GFX9-NEXT: s_mov_b32 s34, s32
358384
; GFX9-NEXT: s_addk_i32 s32, 0x1000
359385
; GFX9-NEXT: s_getpc_b64 s[4:5]
360386
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -373,6 +399,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
373399
; GFX9-NEXT: s_and_b32 s4, s4, -16
374400
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
375401
; GFX9-NEXT: s_add_u32 s32, s5, s4
402+
; GFX9-NEXT: s_add_i32 s32, s34, 0x1000
403+
; GFX9-NEXT: s_mov_b32 s34, s7
376404
; GFX9-NEXT: s_addk_i32 s32, 0xf000
377405
; GFX9-NEXT: s_waitcnt vmcnt(0)
378406
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -382,8 +410,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
382410
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383411
; GFX10-NEXT: s_mov_b32 s6, s33
384412
; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
385-
; GFX10-NEXT: s_addk_i32 s32, 0x800
413+
; GFX10-NEXT: s_mov_b32 s7, s34
386414
; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
415+
; GFX10-NEXT: s_mov_b32 s34, s32
416+
; GFX10-NEXT: s_addk_i32 s32, 0x800
387417
; GFX10-NEXT: s_getpc_b64 s[4:5]
388418
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
389419
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@@ -401,6 +431,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
401431
; GFX10-NEXT: s_and_b32 s4, s4, -16
402432
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
403433
; GFX10-NEXT: s_add_u32 s32, s5, s4
434+
; GFX10-NEXT: s_add_i32 s32, s34, 0x800
435+
; GFX10-NEXT: s_mov_b32 s34, s7
404436
; GFX10-NEXT: s_addk_i32 s32, 0xf800
405437
; GFX10-NEXT: s_setpc_b64 s[30:31]
406438
;
@@ -409,8 +441,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
409441
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410442
; GFX11-NEXT: s_mov_b32 s2, s33
411443
; GFX11-NEXT: s_add_i32 s33, s32, 31
412-
; GFX11-NEXT: s_add_i32 s32, s32, 64
444+
; GFX11-NEXT: s_mov_b32 s3, s34
413445
; GFX11-NEXT: s_and_not1_b32 s33, s33, 31
446+
; GFX11-NEXT: s_mov_b32 s34, s32
447+
; GFX11-NEXT: s_add_i32 s32, s32, 64
414448
; GFX11-NEXT: s_getpc_b64 s[0:1]
415449
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
416450
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
@@ -429,7 +463,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
429463
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
430464
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
431465
; GFX11-NEXT: s_add_u32 s32, s1, s0
432-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
466+
; GFX11-NEXT: s_add_i32 s32, s34, 64
467+
; GFX11-NEXT: s_mov_b32 s34, s3
433468
; GFX11-NEXT: s_addk_i32 s32, 0xffc0
434469
; GFX11-NEXT: s_setpc_b64 s[30:31]
435470
%n = load i32, ptr addrspace(4) @gv

llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,10 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3
151151
; GCN: ; %bb.0: ; %entry
152152
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153153
; GCN-NEXT: s_mov_b32 s7, s33
154+
; GCN-NEXT: s_mov_b32 s8, s34
154155
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
155156
; GCN-NEXT: s_mov_b32 s33, s32
157+
; GCN-NEXT: s_mov_b32 s34, s32
156158
; GCN-NEXT: s_addk_i32 s32, 0x400
157159
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
158160
; GCN-NEXT: s_cbranch_execz .LBB2_3
@@ -178,8 +180,10 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3
178180
; GCN-NEXT: .LBB2_3: ; %bb.2
179181
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
180182
; GCN-NEXT: v_mov_b32_e32 v0, 0
183+
; GCN-NEXT: s_add_i32 s32, s34, 0x400
181184
; GCN-NEXT: global_store_dword v[0:1], v0, off
182185
; GCN-NEXT: s_waitcnt vmcnt(0)
186+
; GCN-NEXT: s_mov_b32 s34, s8
183187
; GCN-NEXT: s_addk_i32 s32, 0xfc00
184188
; GCN-NEXT: s_mov_b32 s33, s7
185189
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -216,8 +220,10 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i
216220
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217221
; GCN-NEXT: s_mov_b32 s7, s33
218222
; GCN-NEXT: s_add_i32 s33, s32, 0xfc0
223+
; GCN-NEXT: s_mov_b32 s8, s34
219224
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
220225
; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000
226+
; GCN-NEXT: s_mov_b32 s34, s32
221227
; GCN-NEXT: s_addk_i32 s32, 0x2000
222228
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
223229
; GCN-NEXT: s_cbranch_execz .LBB3_2
@@ -240,8 +246,10 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i
240246
; GCN-NEXT: .LBB3_2: ; %bb.1
241247
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
242248
; GCN-NEXT: v_mov_b32_e32 v0, 0
249+
; GCN-NEXT: s_add_i32 s32, s34, 0x2000
243250
; GCN-NEXT: global_store_dword v[0:1], v0, off
244251
; GCN-NEXT: s_waitcnt vmcnt(0)
252+
; GCN-NEXT: s_mov_b32 s34, s8
245253
; GCN-NEXT: s_addk_i32 s32, 0xe000
246254
; GCN-NEXT: s_mov_b32 s33, s7
247255
; GCN-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/amdpal-callable.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,15 +149,15 @@ attributes #0 = { nounwind }
149149
; GCN-NEXT: dynamic_stack:
150150
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
151151
; GCN-NEXT: .lds_size: 0{{$}}
152-
; GCN-NEXT: .sgpr_count: 0x28{{$}}
152+
; GCN-NEXT: .sgpr_count: 0x2a{{$}}
153153
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
154154
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
155155
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
156156
; GCN-NEXT: dynamic_stack_loop:
157157
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
158158
; GCN-NEXT: .lds_size: 0{{$}}
159-
; SDAG-NEXT: .sgpr_count: 0x25{{$}}
160-
; GISEL-NEXT: .sgpr_count: 0x26{{$}}
159+
; SDAG-NEXT: .sgpr_count: 0x27{{$}}
160+
; GISEL-NEXT: .sgpr_count: 0x28{{$}}
161161
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
162162
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
163163
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
@@ -182,22 +182,22 @@ attributes #0 = { nounwind }
182182
; GCN-NEXT: no_stack_extern_call:
183183
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
184184
; GCN-NEXT: .lds_size: 0{{$}}
185-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
186-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
185+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
186+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
187187
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
188188
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
189189
; GCN-NEXT: no_stack_extern_call_many_args:
190190
; GCN-NEXT: .backend_stack_size: 0x90{{$}}
191191
; GCN-NEXT: .lds_size: 0{{$}}
192-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
193-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
192+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
193+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
194194
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
195195
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
196196
; GCN-NEXT: no_stack_indirect_call:
197197
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
198198
; GCN-NEXT: .lds_size: 0{{$}}
199-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
200-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
199+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
200+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
201201
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
202202
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
203203
; GCN-NEXT: simple_lds:
@@ -227,15 +227,15 @@ attributes #0 = { nounwind }
227227
; GCN-NEXT: simple_stack_extern_call:
228228
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
229229
; GCN-NEXT: .lds_size: 0{{$}}
230-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
231-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
230+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
231+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
232232
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
233233
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
234234
; GCN-NEXT: simple_stack_indirect_call:
235235
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
236236
; GCN-NEXT: .lds_size: 0{{$}}
237-
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
238-
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
237+
; GFX8-NEXT: .sgpr_count: 0x2a{{$}}
238+
; GFX9-NEXT: .sgpr_count: 0x2e{{$}}
239239
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
240240
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
241241
; GCN-NEXT: simple_stack_recurse:

0 commit comments

Comments
 (0)