@@ -80,13 +80,13 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
8080; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
8181; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8282; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
83- ; GFX9-NEXT: s_mov_b32 s33, s7
8483; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8584; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
8685; GFX9-NEXT: s_and_b32 s4, s4, -16
8786; GFX9-NEXT: s_lshl_b32 s4, s4, 6
8887; GFX9-NEXT: s_add_u32 s32, s6, s4
89- ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
88+ ; GFX9-NEXT: s_mov_b32 s32, s33
89+ ; GFX9-NEXT: s_mov_b32 s33, s7
9090; GFX9-NEXT: s_waitcnt vmcnt(0)
9191; GFX9-NEXT: s_setpc_b64 s[30:31]
9292;
@@ -103,7 +103,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
103103; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
104104; GFX10-NEXT: v_mov_b32_e32 v0, 0
105105; GFX10-NEXT: v_mov_b32_e32 v1, s6
106- ; GFX10-NEXT: s_mov_b32 s33, s7
107106; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
108107; GFX10-NEXT: s_waitcnt lgkmcnt(0)
109108; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
@@ -112,7 +111,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
112111; GFX10-NEXT: s_and_b32 s4, s4, -16
113112; GFX10-NEXT: s_lshl_b32 s4, s4, 5
114113; GFX10-NEXT: s_add_u32 s32, s6, s4
115- ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
114+ ; GFX10-NEXT: s_mov_b32 s32, s33
115+ ; GFX10-NEXT: s_mov_b32 s33, s7
116116; GFX10-NEXT: s_setpc_b64 s[30:31]
117117;
118118; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align4:
@@ -127,7 +127,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
127127; GFX11-NEXT: v_mov_b32_e32 v0, 0
128128; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129129; GFX11-NEXT: s_mov_b32 s2, s32
130- ; GFX11-NEXT: s_mov_b32 s33, s3
131130; GFX11-NEXT: scratch_store_b32 off, v0, s2
132131; GFX11-NEXT: s_waitcnt lgkmcnt(0)
133132; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -136,9 +135,10 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
136135; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
137136; GFX11-NEXT: s_and_b32 s0, s0, -16
138137; GFX11-NEXT: s_lshl_b32 s0, s0, 5
139- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
138+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
140139; GFX11-NEXT: s_add_u32 s32, s2, s0
141- ; GFX11-NEXT: s_add_i32 s32, s32, -16
140+ ; GFX11-NEXT: s_mov_b32 s32, s33
141+ ; GFX11-NEXT: s_mov_b32 s33, s3
142142; GFX11-NEXT: s_setpc_b64 s[30:31]
143143 %n = load i32 , ptr addrspace (4 ) @gv , align 4
144144 %alloca = alloca i32 , i32 %n , addrspace (5 )
@@ -221,13 +221,13 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
221221; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
222222; GFX9-NEXT: s_waitcnt lgkmcnt(0)
223223; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
224- ; GFX9-NEXT: s_mov_b32 s33, s7
225224; GFX9-NEXT: s_waitcnt lgkmcnt(0)
226225; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
227226; GFX9-NEXT: s_and_b32 s4, s4, -16
228227; GFX9-NEXT: s_lshl_b32 s4, s4, 6
229228; GFX9-NEXT: s_add_u32 s32, s6, s4
230- ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
229+ ; GFX9-NEXT: s_mov_b32 s32, s33
230+ ; GFX9-NEXT: s_mov_b32 s33, s7
231231; GFX9-NEXT: s_waitcnt vmcnt(0)
232232; GFX9-NEXT: s_setpc_b64 s[30:31]
233233;
@@ -244,7 +244,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
244244; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
245245; GFX10-NEXT: v_mov_b32_e32 v0, 0
246246; GFX10-NEXT: v_mov_b32_e32 v1, s6
247- ; GFX10-NEXT: s_mov_b32 s33, s7
248247; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
249248; GFX10-NEXT: s_waitcnt lgkmcnt(0)
250249; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
@@ -253,7 +252,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
253252; GFX10-NEXT: s_and_b32 s4, s4, -16
254253; GFX10-NEXT: s_lshl_b32 s4, s4, 5
255254; GFX10-NEXT: s_add_u32 s32, s6, s4
256- ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
255+ ; GFX10-NEXT: s_mov_b32 s32, s33
256+ ; GFX10-NEXT: s_mov_b32 s33, s7
257257; GFX10-NEXT: s_setpc_b64 s[30:31]
258258;
259259; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align16:
@@ -268,7 +268,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
268268; GFX11-NEXT: v_mov_b32_e32 v0, 0
269269; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
270270; GFX11-NEXT: s_mov_b32 s2, s32
271- ; GFX11-NEXT: s_mov_b32 s33, s3
272271; GFX11-NEXT: scratch_store_b32 off, v0, s2
273272; GFX11-NEXT: s_waitcnt lgkmcnt(0)
274273; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -277,9 +276,10 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
277276; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
278277; GFX11-NEXT: s_and_b32 s0, s0, -16
279278; GFX11-NEXT: s_lshl_b32 s0, s0, 5
280- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
279+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
281280; GFX11-NEXT: s_add_u32 s32, s2, s0
282- ; GFX11-NEXT: s_add_i32 s32, s32, -16
281+ ; GFX11-NEXT: s_mov_b32 s32, s33
282+ ; GFX11-NEXT: s_mov_b32 s33, s3
283283; GFX11-NEXT: s_setpc_b64 s[30:31]
284284 %n = load i32 , ptr addrspace (4 ) @gv , align 16
285285 %alloca = alloca i32 , i32 %n , addrspace (5 )
@@ -355,6 +355,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
355355; GFX9-NEXT: s_mov_b32 s6, s33
356356; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
357357; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
358+ ; GFX9-NEXT: s_mov_b32 s7, s34
359+ ; GFX9-NEXT: s_mov_b32 s34, s32
358360; GFX9-NEXT: s_addk_i32 s32, 0x1000
359361; GFX9-NEXT: s_getpc_b64 s[4:5]
360362; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -373,7 +375,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
373375; GFX9-NEXT: s_and_b32 s4, s4, -16
374376; GFX9-NEXT: s_lshl_b32 s4, s4, 6
375377; GFX9-NEXT: s_add_u32 s32, s5, s4
376- ; GFX9-NEXT: s_addk_i32 s32, 0xf000
378+ ; GFX9-NEXT: s_mov_b32 s32, s34
379+ ; GFX9-NEXT: s_mov_b32 s34, s7
377380; GFX9-NEXT: s_waitcnt vmcnt(0)
378381; GFX9-NEXT: s_setpc_b64 s[30:31]
379382;
@@ -382,8 +385,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
382385; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383386; GFX10-NEXT: s_mov_b32 s6, s33
384387; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
385- ; GFX10-NEXT: s_addk_i32 s32, 0x800
388+ ; GFX10-NEXT: s_mov_b32 s7, s34
386389; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
390+ ; GFX10-NEXT: s_mov_b32 s34, s32
391+ ; GFX10-NEXT: s_addk_i32 s32, 0x800
387392; GFX10-NEXT: s_getpc_b64 s[4:5]
388393; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
389394; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@@ -401,16 +406,19 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
401406; GFX10-NEXT: s_and_b32 s4, s4, -16
402407; GFX10-NEXT: s_lshl_b32 s4, s4, 5
403408; GFX10-NEXT: s_add_u32 s32, s5, s4
404- ; GFX10-NEXT: s_addk_i32 s32, 0xf800
409+ ; GFX10-NEXT: s_mov_b32 s32, s34
410+ ; GFX10-NEXT: s_mov_b32 s34, s7
405411; GFX10-NEXT: s_setpc_b64 s[30:31]
406412;
407413; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align32:
408414; GFX11: ; %bb.0:
409415; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410416; GFX11-NEXT: s_mov_b32 s2, s33
411417; GFX11-NEXT: s_add_i32 s33, s32, 31
412- ; GFX11-NEXT: s_add_i32 s32, s32, 64
418+ ; GFX11-NEXT: s_mov_b32 s3, s34
413419; GFX11-NEXT: s_and_not1_b32 s33, s33, 31
420+ ; GFX11-NEXT: s_mov_b32 s34, s32
421+ ; GFX11-NEXT: s_add_i32 s32, s32, 64
414422; GFX11-NEXT: s_getpc_b64 s[0:1]
415423; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
416424; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
@@ -429,8 +437,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
429437; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
430438; GFX11-NEXT: s_lshl_b32 s0, s0, 5
431439; GFX11-NEXT: s_add_u32 s32, s1, s0
432- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
433- ; GFX11-NEXT: s_addk_i32 s32, 0xffc0
440+ ; GFX11-NEXT: s_mov_b32 s32, s34
441+ ; GFX11-NEXT: s_mov_b32 s34, s3
434442; GFX11-NEXT: s_setpc_b64 s[30:31]
435443 %n = load i32 , ptr addrspace (4 ) @gv
436444 %alloca = alloca i32 , i32 %n , align 32 , addrspace (5 )
0 commit comments