@@ -437,30 +437,33 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
437
437
; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
438
438
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
439
439
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
440
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
441
- ; GFX9-O0-NEXT: s_nop 0
442
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
443
440
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
444
441
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
445
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
442
+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
443
+ ; GFX9-O0-NEXT: s_nop 0
444
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
445
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
446
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
447
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
446
448
; GFX9-O0-NEXT: s_nop 0
447
- ; GFX9-O0-NEXT: buffer_store_dword v8 , off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
449
+ ; GFX9-O0-NEXT: buffer_store_dword v10 , off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
448
450
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
449
451
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
450
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
452
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
453
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
451
454
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
452
455
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
453
456
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
454
457
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
455
458
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
456
459
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
457
460
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
458
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], -1
459
- ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
460
- ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
461
+ ; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
462
+ ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
461
463
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
462
464
; GFX9-O0-NEXT: s_mov_b32 s14, s13
463
465
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
466
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
464
467
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
465
468
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
466
469
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -474,17 +477,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
474
477
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
475
478
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
476
479
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
477
- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
478
480
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
479
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[12:13]
480
- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
481
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
481
482
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
482
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[12:13 ]
483
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9 ]
483
484
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
484
485
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
485
- ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
486
486
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
487
- ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[12:13 ]
487
+ ; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9 ]
488
488
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
489
489
; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
490
490
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
@@ -977,10 +977,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
977
977
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
978
978
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
979
979
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
980
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
981
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
982
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
983
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
980
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
981
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
982
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
983
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
984
984
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
985
985
; GFX9-O0-NEXT: s_mov_b32 s5, s6
986
986
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2564,17 +2564,20 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2564
2564
; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
2565
2565
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2566
2566
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2567
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2568
- ; GFX9-O0-NEXT: s_nop 0
2569
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2570
2567
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2571
2568
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2572
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2569
+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2573
2570
; GFX9-O0-NEXT: s_nop 0
2574
- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2571
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2572
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2573
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2574
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2575
+ ; GFX9-O0-NEXT: s_nop 0
2576
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2575
2577
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2576
2578
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2577
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2579
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2580
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2578
2581
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2579
2582
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2580
2583
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
@@ -2587,6 +2590,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2587
2590
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
2588
2591
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2589
2592
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2593
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2590
2594
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2591
2595
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
2592
2596
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3100,10 +3104,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3100
3104
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3101
3105
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3102
3106
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3103
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3104
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3105
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3106
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3107
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3108
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3109
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3110
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3107
3111
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3108
3112
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3109
3113
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments