@@ -475,21 +475,28 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475
475
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476
476
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477
477
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479
- ; GFX9-O0-NEXT: s_nop 0
480
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
481
478
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
482
479
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
483
480
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
484
481
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
485
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
482
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
483
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
484
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
485
+ ; GFX9-O0-NEXT: s_nop 0
486
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
487
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
488
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
489
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486
490
; GFX9-O0-NEXT: s_nop 0
487
- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
491
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492
+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
493
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
489
494
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
490
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
495
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
496
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
491
497
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
492
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
498
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
499
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
493
500
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
494
501
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
495
502
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -501,6 +508,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
501
508
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
502
509
; GFX9-O0-NEXT: s_mov_b32 s14, s13
503
510
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
511
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
504
512
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
505
513
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
506
514
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1039,10 +1047,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1039
1047
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1040
1048
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1041
1049
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1042
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1043
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1044
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1045
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1050
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1051
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1052
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1053
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1046
1054
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1047
1055
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1048
1056
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2660,31 +2668,40 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2660
2668
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2661
2669
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2662
2670
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2663
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2664
- ; GFX9-O0-NEXT: s_nop 0
2665
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2666
2671
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2667
2672
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2668
2673
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2669
2674
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2670
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2675
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2676
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2677
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2671
2678
; GFX9-O0-NEXT: s_nop 0
2672
- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2673
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2679
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2680
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2681
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2682
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2683
+ ; GFX9-O0-NEXT: s_nop 0
2684
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2685
+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2686
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
2674
2687
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2675
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2688
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2689
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2676
2690
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2677
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2691
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2692
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
2678
2693
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
2679
2694
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
2680
2695
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
2681
2696
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
2682
2697
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
2683
- ; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
2684
- ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
2698
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], -1
2699
+ ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
2700
+ ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
2685
2701
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
2686
2702
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2687
2703
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2704
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2688
2705
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2689
2706
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
2690
2707
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -2698,16 +2715,19 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2698
2715
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2699
2716
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
2700
2717
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
2718
+ ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
2701
2719
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
2702
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
2720
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[12:13]
2721
+ ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
2703
2722
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
2704
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9 ]
2723
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[12:13 ]
2705
2724
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
2706
2725
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
2707
2726
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2708
2727
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
2728
+ ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
2709
2729
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
2710
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9 ]
2730
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[12:13 ]
2711
2731
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
2712
2732
; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
2713
2733
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
@@ -3220,10 +3240,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3220
3240
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3221
3241
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3222
3242
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3223
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3224
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3225
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3226
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3243
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3244
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3245
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3246
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3227
3247
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3228
3248
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3229
3249
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments