Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21151,6 +21151,38 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
break;
}
case RISCVISD::VSLIDE1UP_VL:
case RISCVISD::VFSLIDE1UP_VL: {
using namespace SDPatternMatch;
SDValue SrcVec;
SDLoc DL(N);
MVT VT = N->getSimpleValueType(0);
// If the scalar we're sliding in was extracted from the first element of a
// vector, we can use that vector as the passthru in a normal slideup of 1.
// This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
if (!N->getOperand(0).isUndef() ||
!sd_match(N->getOperand(2),
m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
break;

MVT SrcVecVT = SrcVec.getSimpleValueType();
if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
break;
// Adapt the value type of source vector.
if (SrcVecVT.isFixedLengthVector()) {
SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
}
if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements())
SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
else
SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);

return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
N->getOperand(4));
}
}

return SDValue();
Expand Down
85 changes: 38 additions & 47 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1837,14 +1837,11 @@ define <4 x float> @buildvec_vfredusum_slideup(float %start, <8 x float> %arg1,
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v9, v10, v16
; CHECK-NEXT: vfredusum.vs v10, v12, v16
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfmv.f.s fa4, v9
; CHECK-NEXT: vfmv.f.s fa3, v10
; CHECK-NEXT: vfredusum.vs v8, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: vfredusum.vs v11, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v10, v11, 1
; CHECK-NEXT: vslideup.vi v9, v10, 1
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: ret
%247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
Expand All @@ -1861,18 +1858,17 @@ define <8 x float> @buildvec_vfredusum_slideup_leading_undef(float %start, <8 x
; CHECK-LABEL: buildvec_vfredusum_slideup_leading_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
; CHECK-NEXT: vfredusum.vs v9, v8, v16
; CHECK-NEXT: vfredusum.vs v10, v10, v16
; CHECK-NEXT: vfredusum.vs v11, v12, v16
; CHECK-NEXT: vfredusum.vs v8, v14, v16
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: vfmv.f.s fa4, v10
; CHECK-NEXT: vfmv.f.s fa3, v11
; CHECK-NEXT: vfslide1up.vf v10, v8, fa3
; CHECK-NEXT: vfslide1up.vf v8, v10, fa4
; CHECK-NEXT: vfslide1up.vf v10, v8, fa5
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: vfmv.s.f v17, fa0
; CHECK-NEXT: vfredusum.vs v16, v8, v17
; CHECK-NEXT: vfredusum.vs v8, v10, v17
; CHECK-NEXT: vfredusum.vs v10, v12, v17
; CHECK-NEXT: vfredusum.vs v12, v14, v17
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v12, 1
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: vslideup.vi v16, v8, 1
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v16, 4
; CHECK-NEXT: ret
%252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%253 = insertelement <8 x float> poison, float %252, i64 4
Expand All @@ -1890,16 +1886,14 @@ define <8 x float> @buildvec_vfredusum_slideup_trailing_undef(float %start, <8 x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
; CHECK-NEXT: vfredusum.vs v9, v8, v16
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v10, v10, v16
; CHECK-NEXT: vfredusum.vs v11, v12, v16
; CHECK-NEXT: vfredusum.vs v8, v14, v16
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: vfmv.f.s fa4, v10
; CHECK-NEXT: vfmv.f.s fa3, v11
; CHECK-NEXT: vfslide1up.vf v10, v8, fa3
; CHECK-NEXT: vfslide1up.vf v12, v10, fa4
; CHECK-NEXT: vfslide1up.vf v8, v12, fa5
; CHECK-NEXT: vfredusum.vs v12, v12, v16
; CHECK-NEXT: vfredusum.vs v14, v14, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v12, v14, 1
; CHECK-NEXT: vslideup.vi v10, v12, 1
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: ret
%252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%253 = insertelement <8 x float> poison, float %252, i64 0
Expand Down Expand Up @@ -1944,17 +1938,17 @@ define <8 x float> @buildvec_vfredusum_slideup_mid_undef(float %start, <8 x floa
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
; CHECK-NEXT: vfredusum.vs v9, v8, v16
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v10, v10, v16
; CHECK-NEXT: vfredusum.vs v11, v12, v16
; CHECK-NEXT: vfredusum.vs v8, v14, v16
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: vfmv.f.s fa4, v10
; CHECK-NEXT: vfmv.f.s fa3, v11
; CHECK-NEXT: vfslide1up.vf v10, v8, fa3
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: vfslide1up.vf v10, v8, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: vfredusum.vs v12, v12, v16
; CHECK-NEXT: vfredusum.vs v14, v14, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v12, v14, 1
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v14, v12, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v14, 1
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: ret
%252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%253 = insertelement <8 x float> poison, float %252, i64 0
Expand All @@ -1975,14 +1969,11 @@ define <4 x float> @buildvec_vfredosum_slideup(float %start, <8 x float> %arg1,
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfredosum.vs v9, v10, v16
; CHECK-NEXT: vfredosum.vs v10, v12, v16
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfmv.f.s fa4, v9
; CHECK-NEXT: vfmv.f.s fa3, v10
; CHECK-NEXT: vfredosum.vs v8, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: vfredosum.vs v11, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v10, v11, 1
; CHECK-NEXT: vslideup.vi v9, v10, 1
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: ret
%247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
Expand Down
78 changes: 30 additions & 48 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3424,14 +3424,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV32-NEXT: vredsum.vs v8, v8, v16
; RV32-NEXT: vredsum.vs v9, v10, v16
; RV32-NEXT: vredsum.vs v10, v12, v16
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vmv.x.s a1, v9
; RV32-NEXT: vmv.x.s a2, v10
; RV32-NEXT: vredsum.vs v8, v14, v16
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vslide1up.vx v9, v8, a2
; RV32-NEXT: vslide1up.vx v10, v9, a1
; RV32-NEXT: vslide1up.vx v8, v10, a0
; RV32-NEXT: vredsum.vs v11, v14, v16
; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV32-NEXT: vslideup.vi v10, v11, 1
; RV32-NEXT: vslideup.vi v9, v10, 1
; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_vredsum_slideup:
Expand All @@ -3441,14 +3438,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_vredsum_slideup:
Expand Down Expand Up @@ -3498,14 +3492,11 @@ define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
; RV64ZVE32-NEXT: vmv.x.s a0, v8
; RV64ZVE32-NEXT: vmv.x.s a1, v9
; RV64ZVE32-NEXT: vmv.x.s a2, v10
; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32-NEXT: ret
%247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0)
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
Expand All @@ -3525,14 +3516,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vredmaxu.vs v9, v10, v10
; RV32-NEXT: vredmaxu.vs v10, v12, v12
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vmv.x.s a1, v9
; RV32-NEXT: vmv.x.s a2, v10
; RV32-NEXT: vredmaxu.vs v8, v14, v14
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vslide1up.vx v9, v8, a2
; RV32-NEXT: vslide1up.vx v10, v9, a1
; RV32-NEXT: vslide1up.vx v8, v10, a0
; RV32-NEXT: vredmaxu.vs v11, v14, v14
; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV32-NEXT: vslideup.vi v10, v11, 1
; RV32-NEXT: vslideup.vi v9, v10, 1
; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_vredmax_slideup:
Expand All @@ -3541,14 +3529,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_vredmax_slideup:
Expand Down Expand Up @@ -3595,14 +3580,11 @@ define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8
; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
; RV64ZVE32-NEXT: vmv.x.s a0, v8
; RV64ZVE32-NEXT: vmv.x.s a1, v9
; RV64ZVE32-NEXT: vmv.x.s a2, v10
; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32-NEXT: ret
%247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0)
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x
; CHECK-NEXT: vfredusum.vs v9, v12, v8
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfredusum.vs v9, v16, v8
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vfslide1up.vf v8, v9, fa5
; CHECK-NEXT: vfredusum.vs v8, v16, v8
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%s0 = extractelement <2 x float> %arg0, i64 0
%r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1)
Expand Down