diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3d07702c6f78b..f1f3af093e848 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -21151,6 +21151,38 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); break; } + case RISCVISD::VSLIDE1UP_VL: + case RISCVISD::VFSLIDE1UP_VL: { + using namespace SDPatternMatch; + SDValue SrcVec; + SDLoc DL(N); + MVT VT = N->getSimpleValueType(0); + // If the scalar we're sliding in was extracted from the first element of a + // vector, we can use that vector as the passthru in a normal slideup of 1. + // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s). + if (!N->getOperand(0).isUndef() || + !sd_match(N->getOperand(2), + m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()), + m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec))))) + break; + + MVT SrcVecVT = SrcVec.getSimpleValueType(); + if (SrcVecVT.getVectorElementType() != VT.getVectorElementType()) + break; + // Adapt the value type of source vector. + if (SrcVecVT.isFixedLengthVector()) { + SrcVecVT = getContainerForFixedLengthVector(SrcVecVT); + SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget); + } + if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) + SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0); + else + SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0); + + return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1), + DAG.getConstant(1, DL, XLenVT), N->getOperand(3), + N->getOperand(4)); + } } return SDValue(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index aa3b9abe3a7aa..248ec1369076b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1837,14 +1837,11 @@ define <4 x float> @buildvec_vfredusum_slideup(float %start, <8 x float> %arg1, ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfredusum.vs v9, v10, v16 ; CHECK-NEXT: vfredusum.vs v10, v12, v16 -; CHECK-NEXT: vfmv.f.s fa5, v8 -; CHECK-NEXT: vfmv.f.s fa4, v9 -; CHECK-NEXT: vfmv.f.s fa3, v10 -; CHECK-NEXT: vfredusum.vs v8, v14, v16 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfslide1up.vf v9, v8, fa3 -; CHECK-NEXT: vfslide1up.vf v10, v9, fa4 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa5 +; CHECK-NEXT: vfredusum.vs v11, v14, v16 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v10, v11, 1 +; CHECK-NEXT: vslideup.vi v9, v10, 1 +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) %248 = insertelement <4 x float> poison, float %247, i64 0 @@ -1861,18 +1858,17 @@ define <8 x float> @buildvec_vfredusum_slideup_leading_undef(float %start, <8 x ; CHECK-LABEL: buildvec_vfredusum_slideup_leading_undef: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vfredusum.vs v9, v8, v16 -; CHECK-NEXT: vfredusum.vs v10, v10, v16 -; CHECK-NEXT: vfredusum.vs v11, v12, v16 -; CHECK-NEXT: vfredusum.vs v8, v14, v16 -; CHECK-NEXT: vfmv.f.s fa5, v9 -; CHECK-NEXT: vfmv.f.s fa4, v10 -; CHECK-NEXT: vfmv.f.s fa3, v11 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa3 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa4 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa5 -; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vfmv.s.f v17, fa0 +; CHECK-NEXT: vfredusum.vs v16, v8, v17 +; CHECK-NEXT: vfredusum.vs v8, v10, v17 +; CHECK-NEXT: vfredusum.vs v10, v12, v17 +; CHECK-NEXT: vfredusum.vs v12, v14, v17 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vslideup.vi v10, v12, 1 +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vi v16, v8, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v16, 4 ; CHECK-NEXT: ret %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) %253 = insertelement <8 x float> poison, float %252, i64 4 @@ -1890,16 +1886,14 @@ define <8 x float> @buildvec_vfredusum_slideup_trailing_undef(float %start, <8 x ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vfredusum.vs v9, v8, v16 +; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfredusum.vs v10, v10, v16 -; CHECK-NEXT: vfredusum.vs v11, v12, v16 -; CHECK-NEXT: vfredusum.vs v8, v14, v16 -; CHECK-NEXT: vfmv.f.s fa5, v9 -; CHECK-NEXT: vfmv.f.s fa4, v10 -; CHECK-NEXT: vfmv.f.s fa3, v11 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa3 -; CHECK-NEXT: vfslide1up.vf v12, v10, fa4 -; CHECK-NEXT: vfslide1up.vf v8, v12, fa5 +; CHECK-NEXT: vfredusum.vs v12, v12, v16 +; CHECK-NEXT: vfredusum.vs v14, v14, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vslideup.vi v12, v14, 1 +; CHECK-NEXT: vslideup.vi v10, v12, 1 +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) %253 = insertelement <8 x float> poison, float %252, i64 0 @@ -1944,17 +1938,17 @@ define <8 x float> @buildvec_vfredusum_slideup_mid_undef(float %start, <8 x floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vfredusum.vs v9, v8, v16 +; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfredusum.vs v10, v10, v16 -; CHECK-NEXT: vfredusum.vs v11, v12, v16 -; CHECK-NEXT: vfredusum.vs v8, v14, v16 -; CHECK-NEXT: vfmv.f.s fa5, v9 -; CHECK-NEXT: vfmv.f.s fa4, v10 -; CHECK-NEXT: vfmv.f.s fa3, v11 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa3 -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa5 +; CHECK-NEXT: vfredusum.vs v12, v12, v16 +; CHECK-NEXT: vfredusum.vs v14, v14, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vslideup.vi v12, v14, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v14, v12, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vslideup.vi v10, v14, 1 +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) %253 = insertelement <8 x float> poison, float %252, i64 0 @@ -1975,14 +1969,11 @@ define <4 x float> @buildvec_vfredosum_slideup(float %start, <8 x float> %arg1, ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfredosum.vs v9, v10, v16 ; CHECK-NEXT: vfredosum.vs v10, v12, v16 -; CHECK-NEXT: vfmv.f.s fa5, v8 -; CHECK-NEXT: vfmv.f.s fa4, v9 -; CHECK-NEXT: vfmv.f.s fa3, v10 -; CHECK-NEXT: vfredosum.vs v8, v14, v16 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfslide1up.vf v9, v8, fa3 -; CHECK-NEXT: vfslide1up.vf v10, v9, fa4 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa5 +; CHECK-NEXT: vfredosum.vs v11, v14, v16 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v10, v11, 1 +; CHECK-NEXT: vslideup.vi v9, v10, 1 +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) %248 = insertelement <4 x float> poison, float %247, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index eedf19c38766b..aaa0269ef1c63 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -3424,14 +3424,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 ; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vredsum.vs v9, v10, v16 ; RV32-NEXT: vredsum.vs v10, v12, v16 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: vmv.x.s a2, v10 -; RV32-NEXT: vredsum.vs v8, v14, v16 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1up.vx v9, v8, a2 -; RV32-NEXT: vslide1up.vx v10, v9, a1 -; RV32-NEXT: vslide1up.vx v8, v10, a0 +; RV32-NEXT: vredsum.vs v11, v14, v16 +; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV32-NEXT: vslideup.vi v10, v11, 1 +; RV32-NEXT: vslideup.vi v9, v10, 1 +; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_vredsum_slideup: @@ -3441,14 +3438,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 ; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16 ; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16 ; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16 -; RV64V-ONLY-NEXT: vmv.x.s a0, v8 -; RV64V-ONLY-NEXT: vmv.x.s a1, v9 -; RV64V-ONLY-NEXT: vmv.x.s a2, v10 -; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16 -; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2 -; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1 -; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0 +; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16 +; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1 +; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1 +; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_vredsum_slideup: @@ -3498,14 +3492,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 ; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16 ; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16 ; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16 -; RV64ZVE32-NEXT: vmv.x.s a0, v8 -; RV64ZVE32-NEXT: vmv.x.s a1, v9 -; RV64ZVE32-NEXT: vmv.x.s a2, v10 -; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16 -; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2 -; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1 -; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0 +; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16 +; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1 +; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32-NEXT: ret %247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0) %248 = insertelement <4 x i32> poison, i32 %247, i64 0 @@ -3525,14 +3516,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vredmaxu.vs v9, v10, v10 ; RV32-NEXT: vredmaxu.vs v10, v12, v12 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: vmv.x.s a2, v10 -; RV32-NEXT: vredmaxu.vs v8, v14, v14 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1up.vx v9, v8, a2 -; RV32-NEXT: vslide1up.vx v10, v9, a1 -; RV32-NEXT: vslide1up.vx v8, v10, a0 +; RV32-NEXT: vredmaxu.vs v11, v14, v14 +; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV32-NEXT: vslideup.vi v10, v11, 1 +; RV32-NEXT: vslideup.vi v9, v10, 1 +; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_vredmax_slideup: @@ -3541,14 +3529,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 ; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8 ; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10 ; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12 -; RV64V-ONLY-NEXT: vmv.x.s a0, v8 -; RV64V-ONLY-NEXT: vmv.x.s a1, v9 -; RV64V-ONLY-NEXT: vmv.x.s a2, v10 -; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14 -; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2 -; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1 -; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0 +; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14 +; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1 +; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1 +; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_vredmax_slideup: @@ -3595,14 +3580,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 ; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8 ; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10 ; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12 -; RV64ZVE32-NEXT: vmv.x.s a0, v8 -; RV64ZVE32-NEXT: vmv.x.s a1, v9 -; RV64ZVE32-NEXT: vmv.x.s a2, v10 -; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14 -; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2 -; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1 -; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0 +; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14 +; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1 +; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1 +; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32-NEXT: ret %247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0) %248 = insertelement <4 x i32> poison, i32 %247, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll index 821d4240827fb..12ac29aa3a39d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll @@ -9,11 +9,11 @@ define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x ; CHECK-NEXT: vfredusum.vs v9, v12, v8 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vfmv.f.s fa5, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfredusum.vs v9, v16, v8 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vfslide1up.vf v8, v9, fa5 +; CHECK-NEXT: vfredusum.vs v8, v16, v8 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %s0 = extractelement <2 x float> %arg0, i64 0 %r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1)