@@ -3424,14 +3424,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
3424
3424
; RV32-NEXT: vredsum.vs v8, v8, v16
3425
3425
; RV32-NEXT: vredsum.vs v9, v10, v16
3426
3426
; RV32-NEXT: vredsum.vs v10, v12, v16
3427
- ; RV32-NEXT: vmv.x.s a0, v8
3428
- ; RV32-NEXT: vmv.x.s a1, v9
3429
- ; RV32-NEXT: vmv.x.s a2, v10
3430
- ; RV32-NEXT: vredsum.vs v8, v14, v16
3431
- ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3432
- ; RV32-NEXT: vslide1up.vx v9, v8, a2
3433
- ; RV32-NEXT: vslide1up.vx v10, v9, a1
3434
- ; RV32-NEXT: vslide1up.vx v8, v10, a0
3427
+ ; RV32-NEXT: vredsum.vs v11, v14, v16
3428
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3429
+ ; RV32-NEXT: vslideup.vi v10, v11, 1
3430
+ ; RV32-NEXT: vslideup.vi v9, v10, 1
3431
+ ; RV32-NEXT: vslideup.vi v8, v9, 1
3435
3432
; RV32-NEXT: ret
3436
3433
;
3437
3434
; RV64V-ONLY-LABEL: buildvec_vredsum_slideup:
@@ -3441,14 +3438,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
3441
3438
; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
3442
3439
; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
3443
3440
; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
3444
- ; RV64V-ONLY-NEXT: vmv.x.s a0, v8
3445
- ; RV64V-ONLY-NEXT: vmv.x.s a1, v9
3446
- ; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3447
- ; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16
3448
- ; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3449
- ; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
3450
- ; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
3451
- ; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
3441
+ ; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
3442
+ ; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3443
+ ; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
3444
+ ; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
3445
+ ; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
3452
3446
; RV64V-ONLY-NEXT: ret
3453
3447
;
3454
3448
; RVA22U64-LABEL: buildvec_vredsum_slideup:
@@ -3498,14 +3492,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
3498
3492
; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
3499
3493
; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
3500
3494
; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
3501
- ; RV64ZVE32-NEXT: vmv.x.s a0, v8
3502
- ; RV64ZVE32-NEXT: vmv.x.s a1, v9
3503
- ; RV64ZVE32-NEXT: vmv.x.s a2, v10
3504
- ; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16
3505
- ; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3506
- ; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
3507
- ; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
3508
- ; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
3495
+ ; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
3496
+ ; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3497
+ ; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
3498
+ ; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
3499
+ ; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
3509
3500
; RV64ZVE32-NEXT: ret
3510
3501
%247 = tail call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %arg0 )
3511
3502
%248 = insertelement <4 x i32 > poison, i32 %247 , i64 0
@@ -3525,14 +3516,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
3525
3516
; RV32-NEXT: vredmaxu.vs v8, v8, v8
3526
3517
; RV32-NEXT: vredmaxu.vs v9, v10, v10
3527
3518
; RV32-NEXT: vredmaxu.vs v10, v12, v12
3528
- ; RV32-NEXT: vmv.x.s a0, v8
3529
- ; RV32-NEXT: vmv.x.s a1, v9
3530
- ; RV32-NEXT: vmv.x.s a2, v10
3531
- ; RV32-NEXT: vredmaxu.vs v8, v14, v14
3532
- ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3533
- ; RV32-NEXT: vslide1up.vx v9, v8, a2
3534
- ; RV32-NEXT: vslide1up.vx v10, v9, a1
3535
- ; RV32-NEXT: vslide1up.vx v8, v10, a0
3519
+ ; RV32-NEXT: vredmaxu.vs v11, v14, v14
3520
+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3521
+ ; RV32-NEXT: vslideup.vi v10, v11, 1
3522
+ ; RV32-NEXT: vslideup.vi v9, v10, 1
3523
+ ; RV32-NEXT: vslideup.vi v8, v9, 1
3536
3524
; RV32-NEXT: ret
3537
3525
;
3538
3526
; RV64V-ONLY-LABEL: buildvec_vredmax_slideup:
@@ -3541,14 +3529,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
3541
3529
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
3542
3530
; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
3543
3531
; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
3544
- ; RV64V-ONLY-NEXT: vmv.x.s a0, v8
3545
- ; RV64V-ONLY-NEXT: vmv.x.s a1, v9
3546
- ; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3547
- ; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14
3548
- ; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3549
- ; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
3550
- ; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
3551
- ; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
3532
+ ; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
3533
+ ; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3534
+ ; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
3535
+ ; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
3536
+ ; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
3552
3537
; RV64V-ONLY-NEXT: ret
3553
3538
;
3554
3539
; RVA22U64-LABEL: buildvec_vredmax_slideup:
@@ -3595,14 +3580,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
3595
3580
; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
3596
3581
; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
3597
3582
; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
3598
- ; RV64ZVE32-NEXT: vmv.x.s a0, v8
3599
- ; RV64ZVE32-NEXT: vmv.x.s a1, v9
3600
- ; RV64ZVE32-NEXT: vmv.x.s a2, v10
3601
- ; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14
3602
- ; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3603
- ; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
3604
- ; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
3605
- ; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
3583
+ ; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
3584
+ ; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3585
+ ; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
3586
+ ; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
3587
+ ; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
3606
3588
; RV64ZVE32-NEXT: ret
3607
3589
%247 = tail call i32 @llvm.vector.reduce.umax.v8i32 (<8 x i32 > %arg0 )
3608
3590
%248 = insertelement <4 x i32 > poison, i32 %247 , i64 0
0 commit comments