@@ -69,6 +69,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
6969; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7070; GFX9-NEXT: s_mov_b32 s7, s33
7171; GFX9-NEXT: s_mov_b32 s33, s32
72+ ; GFX9-NEXT: s_mov_b32 s8, s34
73+ ; GFX9-NEXT: s_mov_b32 s34, s32
7274; GFX9-NEXT: s_addk_i32 s32, 0x400
7375; GFX9-NEXT: s_getpc_b64 s[4:5]
7476; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -86,6 +88,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
8688; GFX9-NEXT: s_and_b32 s4, s4, -16
8789; GFX9-NEXT: s_lshl_b32 s4, s4, 6
8890; GFX9-NEXT: s_add_u32 s32, s6, s4
91+ ; GFX9-NEXT: s_add_i32 s32, s34, 0x400
92+ ; GFX9-NEXT: s_mov_b32 s34, s8
8993; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9094; GFX9-NEXT: s_waitcnt vmcnt(0)
9195; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -95,6 +99,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
9599; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96100; GFX10-NEXT: s_mov_b32 s7, s33
97101; GFX10-NEXT: s_mov_b32 s33, s32
102+ ; GFX10-NEXT: s_mov_b32 s8, s34
103+ ; GFX10-NEXT: s_mov_b32 s34, s32
98104; GFX10-NEXT: s_addk_i32 s32, 0x200
99105; GFX10-NEXT: s_getpc_b64 s[4:5]
100106; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -112,6 +118,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
112118; GFX10-NEXT: s_and_b32 s4, s4, -16
113119; GFX10-NEXT: s_lshl_b32 s4, s4, 5
114120; GFX10-NEXT: s_add_u32 s32, s6, s4
121+ ; GFX10-NEXT: s_add_i32 s32, s34, 0x200
122+ ; GFX10-NEXT: s_mov_b32 s34, s8
115123; GFX10-NEXT: s_addk_i32 s32, 0xfe00
116124; GFX10-NEXT: s_setpc_b64 s[30:31]
117125;
@@ -120,13 +128,15 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
120128; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121129; GFX11-NEXT: s_mov_b32 s3, s33
122130; GFX11-NEXT: s_mov_b32 s33, s32
131+ ; GFX11-NEXT: s_mov_b32 s4, s34
132+ ; GFX11-NEXT: s_mov_b32 s34, s32
123133; GFX11-NEXT: s_add_i32 s32, s32, 16
124134; GFX11-NEXT: s_getpc_b64 s[0:1]
125135; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
126136; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
127- ; GFX11-NEXT: v_mov_b32_e32 v0, 0
128- ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129137; GFX11-NEXT: s_mov_b32 s2, s32
138+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
139+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
130140; GFX11-NEXT: s_mov_b32 s33, s3
131141; GFX11-NEXT: scratch_store_b32 off, v0, s2
132142; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -136,8 +146,10 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
136146; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
137147; GFX11-NEXT: s_and_b32 s0, s0, -16
138148; GFX11-NEXT: s_lshl_b32 s0, s0, 5
139- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
149+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
140150; GFX11-NEXT: s_add_u32 s32, s2, s0
151+ ; GFX11-NEXT: s_add_i32 s32, s34, 16
152+ ; GFX11-NEXT: s_mov_b32 s34, s4
141153; GFX11-NEXT: s_add_i32 s32, s32, -16
142154; GFX11-NEXT: s_setpc_b64 s[30:31]
143155 %n = load i32 , ptr addrspace (4 ) @gv , align 4
@@ -210,6 +222,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
210222; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211223; GFX9-NEXT: s_mov_b32 s7, s33
212224; GFX9-NEXT: s_mov_b32 s33, s32
225+ ; GFX9-NEXT: s_mov_b32 s8, s34
226+ ; GFX9-NEXT: s_mov_b32 s34, s32
213227; GFX9-NEXT: s_addk_i32 s32, 0x400
214228; GFX9-NEXT: s_getpc_b64 s[4:5]
215229; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -227,6 +241,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
227241; GFX9-NEXT: s_and_b32 s4, s4, -16
228242; GFX9-NEXT: s_lshl_b32 s4, s4, 6
229243; GFX9-NEXT: s_add_u32 s32, s6, s4
244+ ; GFX9-NEXT: s_add_i32 s32, s34, 0x400
245+ ; GFX9-NEXT: s_mov_b32 s34, s8
230246; GFX9-NEXT: s_addk_i32 s32, 0xfc00
231247; GFX9-NEXT: s_waitcnt vmcnt(0)
232248; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -236,6 +252,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
236252; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237253; GFX10-NEXT: s_mov_b32 s7, s33
238254; GFX10-NEXT: s_mov_b32 s33, s32
255+ ; GFX10-NEXT: s_mov_b32 s8, s34
256+ ; GFX10-NEXT: s_mov_b32 s34, s32
239257; GFX10-NEXT: s_addk_i32 s32, 0x200
240258; GFX10-NEXT: s_getpc_b64 s[4:5]
241259; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -253,6 +271,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
253271; GFX10-NEXT: s_and_b32 s4, s4, -16
254272; GFX10-NEXT: s_lshl_b32 s4, s4, 5
255273; GFX10-NEXT: s_add_u32 s32, s6, s4
274+ ; GFX10-NEXT: s_add_i32 s32, s34, 0x200
275+ ; GFX10-NEXT: s_mov_b32 s34, s8
256276; GFX10-NEXT: s_addk_i32 s32, 0xfe00
257277; GFX10-NEXT: s_setpc_b64 s[30:31]
258278;
@@ -261,13 +281,15 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
261281; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262282; GFX11-NEXT: s_mov_b32 s3, s33
263283; GFX11-NEXT: s_mov_b32 s33, s32
284+ ; GFX11-NEXT: s_mov_b32 s4, s34
285+ ; GFX11-NEXT: s_mov_b32 s34, s32
264286; GFX11-NEXT: s_add_i32 s32, s32, 16
265287; GFX11-NEXT: s_getpc_b64 s[0:1]
266288; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
267289; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
268- ; GFX11-NEXT: v_mov_b32_e32 v0, 0
269- ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
270290; GFX11-NEXT: s_mov_b32 s2, s32
291+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
292+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
271293; GFX11-NEXT: s_mov_b32 s33, s3
272294; GFX11-NEXT: scratch_store_b32 off, v0, s2
273295; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -277,8 +299,10 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
277299; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
278300; GFX11-NEXT: s_and_b32 s0, s0, -16
279301; GFX11-NEXT: s_lshl_b32 s0, s0, 5
280- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
302+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
281303; GFX11-NEXT: s_add_u32 s32, s2, s0
304+ ; GFX11-NEXT: s_add_i32 s32, s34, 16
305+ ; GFX11-NEXT: s_mov_b32 s34, s4
282306; GFX11-NEXT: s_add_i32 s32, s32, -16
283307; GFX11-NEXT: s_setpc_b64 s[30:31]
284308 %n = load i32 , ptr addrspace (4 ) @gv , align 16
@@ -355,6 +379,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
355379; GFX9-NEXT: s_mov_b32 s6, s33
356380; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
357381; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
382+ ; GFX9-NEXT: s_mov_b32 s7, s34
383+ ; GFX9-NEXT: s_mov_b32 s34, s32
358384; GFX9-NEXT: s_addk_i32 s32, 0x1000
359385; GFX9-NEXT: s_getpc_b64 s[4:5]
360386; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -373,6 +399,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
373399; GFX9-NEXT: s_and_b32 s4, s4, -16
374400; GFX9-NEXT: s_lshl_b32 s4, s4, 6
375401; GFX9-NEXT: s_add_u32 s32, s5, s4
402+ ; GFX9-NEXT: s_add_i32 s32, s34, 0x1000
403+ ; GFX9-NEXT: s_mov_b32 s34, s7
376404; GFX9-NEXT: s_addk_i32 s32, 0xf000
377405; GFX9-NEXT: s_waitcnt vmcnt(0)
378406; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -382,8 +410,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
382410; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383411; GFX10-NEXT: s_mov_b32 s6, s33
384412; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
385- ; GFX10-NEXT: s_addk_i32 s32, 0x800
413+ ; GFX10-NEXT: s_mov_b32 s7, s34
386414; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
415+ ; GFX10-NEXT: s_mov_b32 s34, s32
416+ ; GFX10-NEXT: s_addk_i32 s32, 0x800
387417; GFX10-NEXT: s_getpc_b64 s[4:5]
388418; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
389419; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@@ -401,6 +431,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
401431; GFX10-NEXT: s_and_b32 s4, s4, -16
402432; GFX10-NEXT: s_lshl_b32 s4, s4, 5
403433; GFX10-NEXT: s_add_u32 s32, s5, s4
434+ ; GFX10-NEXT: s_add_i32 s32, s34, 0x800
435+ ; GFX10-NEXT: s_mov_b32 s34, s7
404436; GFX10-NEXT: s_addk_i32 s32, 0xf800
405437; GFX10-NEXT: s_setpc_b64 s[30:31]
406438;
@@ -409,8 +441,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
409441; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410442; GFX11-NEXT: s_mov_b32 s2, s33
411443; GFX11-NEXT: s_add_i32 s33, s32, 31
412- ; GFX11-NEXT: s_add_i32 s32, s32, 64
444+ ; GFX11-NEXT: s_mov_b32 s3, s34
413445; GFX11-NEXT: s_and_not1_b32 s33, s33, 31
446+ ; GFX11-NEXT: s_mov_b32 s34, s32
447+ ; GFX11-NEXT: s_add_i32 s32, s32, 64
414448; GFX11-NEXT: s_getpc_b64 s[0:1]
415449; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
416450; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
@@ -429,7 +463,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
429463; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
430464; GFX11-NEXT: s_lshl_b32 s0, s0, 5
431465; GFX11-NEXT: s_add_u32 s32, s1, s0
432- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
466+ ; GFX11-NEXT: s_add_i32 s32, s34, 64
467+ ; GFX11-NEXT: s_mov_b32 s34, s3
433468; GFX11-NEXT: s_addk_i32 s32, 0xffc0
434469; GFX11-NEXT: s_setpc_b64 s[30:31]
435470 %n = load i32 , ptr addrspace (4 ) @gv
0 commit comments