@@ -85,7 +85,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
8585; GFX9-NEXT: s_and_b32 s4, s4, -16
8686; GFX9-NEXT: s_lshl_b32 s4, s4, 6
8787; GFX9-NEXT: s_add_u32 s32, s6, s4
88- ; GFX9-NEXT: s_mov_b32 s32, s33
8988; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9089; GFX9-NEXT: s_mov_b32 s33, s7
9190; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -112,7 +111,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
112111; GFX10-NEXT: s_and_b32 s4, s4, -16
113112; GFX10-NEXT: s_lshl_b32 s4, s4, 5
114113; GFX10-NEXT: s_add_u32 s32, s6, s4
115- ; GFX10-NEXT: s_mov_b32 s32, s33
116114; GFX10-NEXT: s_addk_i32 s32, 0xfe00
117115; GFX10-NEXT: s_mov_b32 s33, s7
118116; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -126,9 +124,9 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
126124; GFX11-NEXT: s_getpc_b64 s[0:1]
127125; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
128126; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
129- ; GFX11-NEXT: s_mov_b32 s2, s32
130- ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
131127; GFX11-NEXT: v_mov_b32_e32 v0, 0
128+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129+ ; GFX11-NEXT: s_mov_b32 s2, s32
132130; GFX11-NEXT: scratch_store_b32 off, v0, s2
133131; GFX11-NEXT: s_waitcnt lgkmcnt(0)
134132; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -137,9 +135,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
137135; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
138136; GFX11-NEXT: s_and_b32 s0, s0, -16
139137; GFX11-NEXT: s_lshl_b32 s0, s0, 5
140- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1 ) | instid1(SALU_CYCLE_1)
138+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT ) | instid1(SALU_CYCLE_1)
141139; GFX11-NEXT: s_add_u32 s32, s2, s0
142- ; GFX11-NEXT: s_mov_b32 s32, s33
143140; GFX11-NEXT: s_add_i32 s32, s32, -16
144141; GFX11-NEXT: s_mov_b32 s33, s3
145142; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -229,7 +226,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
229226; GFX9-NEXT: s_and_b32 s4, s4, -16
230227; GFX9-NEXT: s_lshl_b32 s4, s4, 6
231228; GFX9-NEXT: s_add_u32 s32, s6, s4
232- ; GFX9-NEXT: s_mov_b32 s32, s33
233229; GFX9-NEXT: s_addk_i32 s32, 0xfc00
234230; GFX9-NEXT: s_mov_b32 s33, s7
235231; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -256,7 +252,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
256252; GFX10-NEXT: s_and_b32 s4, s4, -16
257253; GFX10-NEXT: s_lshl_b32 s4, s4, 5
258254; GFX10-NEXT: s_add_u32 s32, s6, s4
259- ; GFX10-NEXT: s_mov_b32 s32, s33
260255; GFX10-NEXT: s_addk_i32 s32, 0xfe00
261256; GFX10-NEXT: s_mov_b32 s33, s7
262257; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -270,9 +265,9 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
270265; GFX11-NEXT: s_getpc_b64 s[0:1]
271266; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
272267; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
273- ; GFX11-NEXT: s_mov_b32 s2, s32
274- ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
275268; GFX11-NEXT: v_mov_b32_e32 v0, 0
269+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
270+ ; GFX11-NEXT: s_mov_b32 s2, s32
276271; GFX11-NEXT: scratch_store_b32 off, v0, s2
277272; GFX11-NEXT: s_waitcnt lgkmcnt(0)
278273; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -281,9 +276,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
281276; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
282277; GFX11-NEXT: s_and_b32 s0, s0, -16
283278; GFX11-NEXT: s_lshl_b32 s0, s0, 5
284- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1 ) | instid1(SALU_CYCLE_1)
279+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT ) | instid1(SALU_CYCLE_1)
285280; GFX11-NEXT: s_add_u32 s32, s2, s0
286- ; GFX11-NEXT: s_mov_b32 s32, s33
287281; GFX11-NEXT: s_add_i32 s32, s32, -16
288282; GFX11-NEXT: s_mov_b32 s33, s3
289283; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -361,8 +355,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
361355; GFX9-NEXT: s_mov_b32 s6, s33
362356; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
363357; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
364- ; GFX9-NEXT: s_mov_b32 s7, s34
365- ; GFX9-NEXT: s_mov_b32 s34, s32
366358; GFX9-NEXT: s_addk_i32 s32, 0x1000
367359; GFX9-NEXT: s_getpc_b64 s[4:5]
368360; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -380,8 +372,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
380372; GFX9-NEXT: s_and_b32 s4, s4, -16
381373; GFX9-NEXT: s_lshl_b32 s4, s4, 6
382374; GFX9-NEXT: s_add_u32 s32, s5, s4
383- ; GFX9-NEXT: s_mov_b32 s32, s34
384- ; GFX9-NEXT: s_mov_b32 s34, s7
385375; GFX9-NEXT: s_addk_i32 s32, 0xf000
386376; GFX9-NEXT: s_mov_b32 s33, s6
387377; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -392,9 +382,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
392382; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393383; GFX10-NEXT: s_mov_b32 s6, s33
394384; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
395- ; GFX10-NEXT: s_mov_b32 s7, s34
396385; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
397- ; GFX10-NEXT: s_mov_b32 s34, s32
398386; GFX10-NEXT: s_addk_i32 s32, 0x800
399387; GFX10-NEXT: s_getpc_b64 s[4:5]
400388; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -412,8 +400,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
412400; GFX10-NEXT: s_and_b32 s4, s4, -16
413401; GFX10-NEXT: s_lshl_b32 s4, s4, 5
414402; GFX10-NEXT: s_add_u32 s32, s5, s4
415- ; GFX10-NEXT: s_mov_b32 s32, s34
416- ; GFX10-NEXT: s_mov_b32 s34, s7
417403; GFX10-NEXT: s_addk_i32 s32, 0xf800
418404; GFX10-NEXT: s_mov_b32 s33, s6
419405; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -423,9 +409,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
423409; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424410; GFX11-NEXT: s_mov_b32 s2, s33
425411; GFX11-NEXT: s_add_i32 s33, s32, 31
426- ; GFX11-NEXT: s_mov_b32 s3, s34
412+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
427413; GFX11-NEXT: s_and_not1_b32 s33, s33, 31
428- ; GFX11-NEXT: s_mov_b32 s34, s32
429414; GFX11-NEXT: s_add_i32 s32, s32, 64
430415; GFX11-NEXT: s_getpc_b64 s[0:1]
431416; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
@@ -444,8 +429,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
444429; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
445430; GFX11-NEXT: s_lshl_b32 s0, s0, 5
446431; GFX11-NEXT: s_add_u32 s32, s1, s0
447- ; GFX11-NEXT: s_mov_b32 s32, s34
448- ; GFX11-NEXT: s_mov_b32 s34, s3
432+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
449433; GFX11-NEXT: s_addk_i32 s32, 0xffc0
450434; GFX11-NEXT: s_mov_b32 s33, s2
451435; GFX11-NEXT: s_setpc_b64 s[30:31]
0 commit comments