Skip to content

Commit d06b1eb

Browse files
mshockwavetopperc
andcommitted
[RISCV] Fold (vslide1up undef, v, (extract_elt x, 0)) into (vslideup x, v, 1)
Co-authored-by: Craig Topper <[email protected]>
1 parent c1ae381 commit d06b1eb

File tree

4 files changed

+75
-68
lines changed

4 files changed

+75
-68
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21151,6 +21151,37 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2115121151
return N->getOperand(0);
2115221152
break;
2115321153
}
21154+
case RISCVISD::VSLIDE1UP_VL:
21155+
case RISCVISD::VFSLIDE1UP_VL: {
21156+
using namespace SDPatternMatch;
21157+
SDValue SrcVec;
21158+
SDLoc DL(N);
21159+
MVT VT = N->getSimpleValueType(0);
21160+
// If the scalar we're sliding in was extracted from the first element of a
21161+
// vector, we can use that vector as the passthru in a normal slideup of 1.
21162+
// This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21163+
if (N->getOperand(0).isUndef() &&
21164+
sd_match(
21165+
N->getOperand(2),
21166+
m_OneUse(m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21167+
m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))) {
21168+
MVT SrcVecVT = SrcVec.getSimpleValueType();
21169+
// Adapt the value type of source vector.
21170+
if (SrcVecVT.isFixedLengthVector()) {
21171+
SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21172+
SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21173+
}
21174+
if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements())
21175+
SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21176+
else
21177+
SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21178+
21179+
return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21180+
DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21181+
N->getOperand(4));
21182+
}
21183+
break;
21184+
}
2115421185
}
2115521186

2115621187
return SDValue();

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1837,14 +1837,11 @@ define <4 x float> @buildvec_vfredusum_slideup(float %start, <8 x float> %arg1,
18371837
; CHECK-NEXT: vfredusum.vs v8, v8, v16
18381838
; CHECK-NEXT: vfredusum.vs v9, v10, v16
18391839
; CHECK-NEXT: vfredusum.vs v10, v12, v16
1840-
; CHECK-NEXT: vfmv.f.s fa5, v8
1841-
; CHECK-NEXT: vfmv.f.s fa4, v9
1842-
; CHECK-NEXT: vfmv.f.s fa3, v10
1843-
; CHECK-NEXT: vfredusum.vs v8, v14, v16
1844-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1845-
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
1846-
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
1847-
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
1840+
; CHECK-NEXT: vfredusum.vs v11, v14, v16
1841+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
1842+
; CHECK-NEXT: vslideup.vi v10, v11, 1
1843+
; CHECK-NEXT: vslideup.vi v9, v10, 1
1844+
; CHECK-NEXT: vslideup.vi v8, v9, 1
18481845
; CHECK-NEXT: ret
18491846
%247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
18501847
%248 = insertelement <4 x float> poison, float %247, i64 0
@@ -1975,14 +1972,11 @@ define <4 x float> @buildvec_vfredosum_slideup(float %start, <8 x float> %arg1,
19751972
; CHECK-NEXT: vfredosum.vs v8, v8, v16
19761973
; CHECK-NEXT: vfredosum.vs v9, v10, v16
19771974
; CHECK-NEXT: vfredosum.vs v10, v12, v16
1978-
; CHECK-NEXT: vfmv.f.s fa5, v8
1979-
; CHECK-NEXT: vfmv.f.s fa4, v9
1980-
; CHECK-NEXT: vfmv.f.s fa3, v10
1981-
; CHECK-NEXT: vfredosum.vs v8, v14, v16
1982-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1983-
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
1984-
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
1985-
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
1975+
; CHECK-NEXT: vfredosum.vs v11, v14, v16
1976+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
1977+
; CHECK-NEXT: vslideup.vi v10, v11, 1
1978+
; CHECK-NEXT: vslideup.vi v9, v10, 1
1979+
; CHECK-NEXT: vslideup.vi v8, v9, 1
19861980
; CHECK-NEXT: ret
19871981
%247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
19881982
%248 = insertelement <4 x float> poison, float %247, i64 0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll

Lines changed: 30 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3424,14 +3424,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
34243424
; RV32-NEXT: vredsum.vs v8, v8, v16
34253425
; RV32-NEXT: vredsum.vs v9, v10, v16
34263426
; RV32-NEXT: vredsum.vs v10, v12, v16
3427-
; RV32-NEXT: vmv.x.s a0, v8
3428-
; RV32-NEXT: vmv.x.s a1, v9
3429-
; RV32-NEXT: vmv.x.s a2, v10
3430-
; RV32-NEXT: vredsum.vs v8, v14, v16
3431-
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3432-
; RV32-NEXT: vslide1up.vx v9, v8, a2
3433-
; RV32-NEXT: vslide1up.vx v10, v9, a1
3434-
; RV32-NEXT: vslide1up.vx v8, v10, a0
3427+
; RV32-NEXT: vredsum.vs v11, v14, v16
3428+
; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3429+
; RV32-NEXT: vslideup.vi v10, v11, 1
3430+
; RV32-NEXT: vslideup.vi v9, v10, 1
3431+
; RV32-NEXT: vslideup.vi v8, v9, 1
34353432
; RV32-NEXT: ret
34363433
;
34373434
; RV64V-ONLY-LABEL: buildvec_vredsum_slideup:
@@ -3441,14 +3438,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
34413438
; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
34423439
; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
34433440
; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
3444-
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
3445-
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
3446-
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3447-
; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16
3448-
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3449-
; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
3450-
; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
3451-
; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
3441+
; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
3442+
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3443+
; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
3444+
; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
3445+
; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
34523446
; RV64V-ONLY-NEXT: ret
34533447
;
34543448
; RVA22U64-LABEL: buildvec_vredsum_slideup:
@@ -3498,14 +3492,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
34983492
; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
34993493
; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
35003494
; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
3501-
; RV64ZVE32-NEXT: vmv.x.s a0, v8
3502-
; RV64ZVE32-NEXT: vmv.x.s a1, v9
3503-
; RV64ZVE32-NEXT: vmv.x.s a2, v10
3504-
; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16
3505-
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3506-
; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
3507-
; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
3508-
; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
3495+
; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
3496+
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3497+
; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
3498+
; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
3499+
; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
35093500
; RV64ZVE32-NEXT: ret
35103501
%247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0)
35113502
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
@@ -3525,14 +3516,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
35253516
; RV32-NEXT: vredmaxu.vs v8, v8, v8
35263517
; RV32-NEXT: vredmaxu.vs v9, v10, v10
35273518
; RV32-NEXT: vredmaxu.vs v10, v12, v12
3528-
; RV32-NEXT: vmv.x.s a0, v8
3529-
; RV32-NEXT: vmv.x.s a1, v9
3530-
; RV32-NEXT: vmv.x.s a2, v10
3531-
; RV32-NEXT: vredmaxu.vs v8, v14, v14
3532-
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3533-
; RV32-NEXT: vslide1up.vx v9, v8, a2
3534-
; RV32-NEXT: vslide1up.vx v10, v9, a1
3535-
; RV32-NEXT: vslide1up.vx v8, v10, a0
3519+
; RV32-NEXT: vredmaxu.vs v11, v14, v14
3520+
; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3521+
; RV32-NEXT: vslideup.vi v10, v11, 1
3522+
; RV32-NEXT: vslideup.vi v9, v10, 1
3523+
; RV32-NEXT: vslideup.vi v8, v9, 1
35363524
; RV32-NEXT: ret
35373525
;
35383526
; RV64V-ONLY-LABEL: buildvec_vredmax_slideup:
@@ -3541,14 +3529,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
35413529
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
35423530
; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
35433531
; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
3544-
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
3545-
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
3546-
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3547-
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14
3548-
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3549-
; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
3550-
; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
3551-
; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
3532+
; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
3533+
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3534+
; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
3535+
; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
3536+
; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
35523537
; RV64V-ONLY-NEXT: ret
35533538
;
35543539
; RVA22U64-LABEL: buildvec_vredmax_slideup:
@@ -3595,14 +3580,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
35953580
; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
35963581
; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
35973582
; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
3598-
; RV64ZVE32-NEXT: vmv.x.s a0, v8
3599-
; RV64ZVE32-NEXT: vmv.x.s a1, v9
3600-
; RV64ZVE32-NEXT: vmv.x.s a2, v10
3601-
; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14
3602-
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3603-
; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
3604-
; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
3605-
; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
3583+
; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
3584+
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3585+
; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
3586+
; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
3587+
; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
36063588
; RV64ZVE32-NEXT: ret
36073589
%247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0)
36083590
%248 = insertelement <4 x i32> poison, i32 %247, i64 0

llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x
99
; CHECK-NEXT: vfredusum.vs v9, v12, v8
1010
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1111
; CHECK-NEXT: vslidedown.vi v8, v8, 1
12-
; CHECK-NEXT: vfmv.f.s fa5, v9
1312
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
14-
; CHECK-NEXT: vfredusum.vs v9, v16, v8
15-
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
16-
; CHECK-NEXT: vfslide1up.vf v8, v9, fa5
13+
; CHECK-NEXT: vfredusum.vs v8, v16, v8
14+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma
15+
; CHECK-NEXT: vslideup.vi v9, v8, 1
16+
; CHECK-NEXT: vmv1r.v v8, v9
1717
; CHECK-NEXT: ret
1818
%s0 = extractelement <2 x float> %arg0, i64 0
1919
%r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1)

0 commit comments

Comments
 (0)