@@ -479,21 +479,28 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
479479; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480480; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
481481; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
482- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
483- ; GFX9-O0-NEXT: s_nop 0
484- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
485482; GFX9-O0-NEXT: ; implicit-def: $sgpr8
486483; GFX9-O0-NEXT: ; implicit-def: $sgpr8
487484; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
488485; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
489- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
487+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
488+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
489+ ; GFX9-O0-NEXT: s_nop 0
490+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
491+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
492+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
493+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
490494; GFX9-O0-NEXT: s_nop 0
491- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
495+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
496+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
497+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
493498; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
494- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
499+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
500+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
495501; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
496- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
502+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
503+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
497504; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
498505; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
499506; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -504,6 +511,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
504511; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
505512; GFX9-O0-NEXT: s_mov_b32 s14, s13
506513; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
514+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
507515; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
508516; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
509517; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1036,10 +1044,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10361044; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10371045; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
10381046; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1039- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1040- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1041- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1042- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1047+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1048+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1049+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1050+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
10431051; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10441052; GFX9-O0-NEXT: s_mov_b32 s5, s6
10451053; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2654,21 +2662,28 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26542662; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26552663; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
26562664; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2657- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2658- ; GFX9-O0-NEXT: s_nop 0
2659- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
26602665; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26612666; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26622667; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
26632668; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2664- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2669+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2670+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2671+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2672+ ; GFX9-O0-NEXT: s_nop 0
2673+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2674+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2675+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2676+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
26652677; GFX9-O0-NEXT: s_nop 0
2666- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2667- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2678+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2679+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2680+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
26682681; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2669- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2682+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2683+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
26702684; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2671- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2685+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2686+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
26722687; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
26732688; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
26742689; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2679,6 +2694,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26792694; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
26802695; GFX9-O0-NEXT: s_mov_b32 s14, s13
26812696; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2697+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
26822698; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
26832699; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
26842700; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3211,10 +3227,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
32113227; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
32123228; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
32133229; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3214- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3215- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3216- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3217- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3230+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3231+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3232+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3233+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
32183234; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
32193235; GFX9-O0-NEXT: s_mov_b32 s5, s6
32203236; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments