@@ -3424,16 +3424,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
3424
3424
; RV32-NEXT: vredsum.vs v8, v8, v16
3425
3425
; RV32-NEXT: vredsum.vs v9, v10, v16
3426
3426
; RV32-NEXT: vredsum.vs v10, v12, v16
3427
- ; RV32-NEXT: vredsum.vs v11, v14, v16
3428
3427
; RV32-NEXT: vmv.x.s a0, v8
3429
3428
; RV32-NEXT: vmv.x.s a1, v9
3430
3429
; RV32-NEXT: vmv.x.s a2, v10
3431
- ; RV32-NEXT: vmv.x.s a3, v11
3430
+ ; RV32-NEXT: vredsum.vs v8, v14, v16
3432
3431
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3433
- ; RV32-NEXT: vmv.v.x v8, a0
3434
- ; RV32-NEXT: vslide1down.vx v8, v8, a1
3435
- ; RV32-NEXT: vslide1down.vx v8, v8, a2
3436
- ; RV32-NEXT: vslide1down.vx v8, v8, a3
3432
+ ; RV32-NEXT: vslide1up.vx v9, v8, a2
3433
+ ; RV32-NEXT: vslide1up.vx v10, v9, a1
3434
+ ; RV32-NEXT: vslide1up.vx v8, v10, a0
3437
3435
; RV32-NEXT: ret
3438
3436
;
3439
3437
; RV64V-ONLY-LABEL: buildvec_vredsum:
@@ -3443,16 +3441,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
3443
3441
; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
3444
3442
; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
3445
3443
; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
3446
- ; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
3447
3444
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
3448
3445
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
3449
3446
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3450
- ; RV64V-ONLY-NEXT: vmv.x.s a3, v11
3447
+ ; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16
3451
3448
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3452
- ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3453
- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3454
- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
3455
- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
3449
+ ; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
3450
+ ; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
3451
+ ; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
3456
3452
; RV64V-ONLY-NEXT: ret
3457
3453
;
3458
3454
; RVA22U64-LABEL: buildvec_vredsum:
@@ -3502,16 +3498,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
3502
3498
; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
3503
3499
; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
3504
3500
; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
3505
- ; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
3506
3501
; RV64ZVE32-NEXT: vmv.x.s a0, v8
3507
3502
; RV64ZVE32-NEXT: vmv.x.s a1, v9
3508
3503
; RV64ZVE32-NEXT: vmv.x.s a2, v10
3509
- ; RV64ZVE32-NEXT: vmv.x.s a3, v11
3504
+ ; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16
3510
3505
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3511
- ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3512
- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3513
- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
3514
- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
3506
+ ; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
3507
+ ; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
3508
+ ; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
3515
3509
; RV64ZVE32-NEXT: ret
3516
3510
%247 = tail call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %arg0 )
3517
3511
%248 = insertelement <4 x i32 > poison, i32 %247 , i64 0
@@ -3531,16 +3525,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
3531
3525
; RV32-NEXT: vredmaxu.vs v8, v8, v8
3532
3526
; RV32-NEXT: vredmaxu.vs v9, v10, v10
3533
3527
; RV32-NEXT: vredmaxu.vs v10, v12, v12
3534
- ; RV32-NEXT: vredmaxu.vs v11, v14, v14
3535
3528
; RV32-NEXT: vmv.x.s a0, v8
3536
3529
; RV32-NEXT: vmv.x.s a1, v9
3537
3530
; RV32-NEXT: vmv.x.s a2, v10
3538
- ; RV32-NEXT: vmv.x.s a3, v11
3531
+ ; RV32-NEXT: vredmaxu.vs v8, v14, v14
3539
3532
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3540
- ; RV32-NEXT: vmv.v.x v8, a0
3541
- ; RV32-NEXT: vslide1down.vx v8, v8, a1
3542
- ; RV32-NEXT: vslide1down.vx v8, v8, a2
3543
- ; RV32-NEXT: vslide1down.vx v8, v8, a3
3533
+ ; RV32-NEXT: vslide1up.vx v9, v8, a2
3534
+ ; RV32-NEXT: vslide1up.vx v10, v9, a1
3535
+ ; RV32-NEXT: vslide1up.vx v8, v10, a0
3544
3536
; RV32-NEXT: ret
3545
3537
;
3546
3538
; RV64V-ONLY-LABEL: buildvec_vredmax:
@@ -3549,16 +3541,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
3549
3541
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
3550
3542
; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
3551
3543
; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
3552
- ; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
3553
3544
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
3554
3545
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
3555
3546
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3556
- ; RV64V-ONLY-NEXT: vmv.x.s a3, v11
3547
+ ; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14
3557
3548
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3558
- ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3559
- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3560
- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
3561
- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
3549
+ ; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
3550
+ ; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
3551
+ ; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
3562
3552
; RV64V-ONLY-NEXT: ret
3563
3553
;
3564
3554
; RVA22U64-LABEL: buildvec_vredmax:
@@ -3605,16 +3595,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
3605
3595
; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
3606
3596
; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
3607
3597
; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
3608
- ; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
3609
3598
; RV64ZVE32-NEXT: vmv.x.s a0, v8
3610
3599
; RV64ZVE32-NEXT: vmv.x.s a1, v9
3611
3600
; RV64ZVE32-NEXT: vmv.x.s a2, v10
3612
- ; RV64ZVE32-NEXT: vmv.x.s a3, v11
3601
+ ; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14
3613
3602
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3614
- ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3615
- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3616
- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
3617
- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
3603
+ ; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
3604
+ ; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
3605
+ ; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
3618
3606
; RV64ZVE32-NEXT: ret
3619
3607
%247 = tail call i32 @llvm.vector.reduce.umax.v8i32 (<8 x i32 > %arg0 )
3620
3608
%248 = insertelement <4 x i32 > poison, i32 %247 , i64 0
0 commit comments