@@ -475,32 +475,35 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475
475
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476
476
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477
477
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479
- ; GFX9-O0-NEXT: s_nop 0
480
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
481
478
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
482
479
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
483
480
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
484
481
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
485
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
482
+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
483
+ ; GFX9-O0-NEXT: s_nop 0
484
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
485
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
486
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
487
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486
488
; GFX9-O0-NEXT: s_nop 0
487
- ; GFX9-O0-NEXT: buffer_store_dword v8 , off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
489
+ ; GFX9-O0-NEXT: buffer_store_dword v10 , off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488
490
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
489
491
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
490
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
492
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
493
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
491
494
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
492
495
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
493
496
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
494
497
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
495
498
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
496
499
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
497
500
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
498
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], -1
499
- ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
500
- ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
501
+ ; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
502
+ ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
501
503
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
502
504
; GFX9-O0-NEXT: s_mov_b32 s14, s13
503
505
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
506
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
504
507
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
505
508
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
506
509
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -514,19 +517,16 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
514
517
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
515
518
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
516
519
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
517
- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
518
520
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
519
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[12:13]
520
- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
521
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
521
522
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
522
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[12:13 ]
523
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9 ]
523
524
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
524
525
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
525
526
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
526
527
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
527
- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
528
528
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
529
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[12:13 ]
529
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9 ]
530
530
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
531
531
; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
532
532
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
@@ -1039,10 +1039,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1039
1039
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1040
1040
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1041
1041
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1042
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1043
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1044
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1045
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1042
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1043
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1044
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1045
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1046
1046
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1047
1047
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1048
1048
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2660,19 +2660,22 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2660
2660
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2661
2661
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2662
2662
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2663
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2664
- ; GFX9-O0-NEXT: s_nop 0
2665
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2666
2663
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2667
2664
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2668
2665
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2669
2666
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2670
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2667
+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2671
2668
; GFX9-O0-NEXT: s_nop 0
2672
- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2669
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2670
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2671
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2672
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2673
+ ; GFX9-O0-NEXT: s_nop 0
2674
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2673
2675
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2674
2676
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2675
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2677
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2678
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2676
2679
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2677
2680
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2678
2681
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
@@ -2685,6 +2688,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2685
2688
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
2686
2689
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2687
2690
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2691
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2688
2692
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2689
2693
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
2690
2694
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3220,10 +3224,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3220
3224
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3221
3225
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3222
3226
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3223
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3224
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3225
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3226
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3227
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3228
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3229
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3230
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3227
3231
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3228
3232
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3229
3233
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments