Skip to content
80 changes: 68 additions & 12 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4512,42 +4512,98 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
"Illegal type which will result in reserved encoding");

const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
auto getVSlide = [&](bool SlideUp, EVT ContainerVT, SDValue Passthru,
SDValue Vec, SDValue Offset, SDValue Mask,
SDValue VL) -> SDValue {
if (SlideUp)
return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
Mask, VL, Policy);
return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
Mask, VL, Policy);
};

// General case: splat the first operand and slide other operands down one
// by one to form a vector. Alternatively, if the last operand is an
// extraction from element 0 of a vector, we can use the original vector
// reduction result as the start value and slide up instead of slide down.
// Such that we can avoid the splat.
SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
SDValue EVec;
bool SlideUp = false;
// Find the first first non-undef from the tail.
auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(),
[](SDValue V) { return !V.isUndef(); });
if (ItLastNonUndef != Operands.rend()) {
using namespace SDPatternMatch;
// Check if the last non-undef operand was an extraction.
SlideUp = sd_match(*ItLastNonUndef, m_ExtractElt(m_Value(EVec), m_Zero()));
}

if (SlideUp) {
MVT EVecContainerVT = EVec.getSimpleValueType();
// Make sure the original vector has scalable vector type.
if (EVecContainerVT.isFixedLengthVector()) {
EVecContainerVT =
getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
}

// Adapt EVec's type into ContainerVT.
if (EVecContainerVT.getVectorMinNumElements() <
ContainerVT.getVectorMinNumElements())
EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
else
EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);

// Reverse the elements as we're going to slide up from the last element.
std::reverse(Operands.begin(), Operands.end());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, does llvm::reverse work here?

Suggested change
std::reverse(Operands.begin(), Operands.end());
reverse(Operands);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, does llvm::reverse work here?

That was actually what I thought, but unfortunately, llvm::reverse returns an iterator range instead of reversing content in-place.

}

SDValue Vec;
UndefCount = 0;
for (SDValue V : Op->ops()) {
for (SDValue V : Operands) {
if (V.isUndef()) {
UndefCount++;
continue;
}

// Start our sequence with a TA splat in the hopes that hardware is able to
// recognize there's no dependency on the prior value of our temporary
// register.
// Start our sequence with either a TA splat or a reduction result in the
// hopes that hardware is able to recognize there's no dependency on the
// prior value of our temporary register.
if (!Vec) {
Vec = DAG.getSplatVector(VT, DL, V);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
if (SlideUp) {
Vec = EVec;
} else {
Vec = DAG.getSplatVector(VT, DL, V);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}

UndefCount = 0;
continue;
}

if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
Vec, Offset, Mask, VL, Policy);
Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
Offset, Mask, VL);
UndefCount = 0;
}
auto OpCode =
VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;

unsigned OpCode;
if (VT.isFloatingPoint())
OpCode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
else
OpCode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;

if (!VT.isFloatingPoint())
V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
V, Mask, VL);
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
Vec, Offset, Mask, VL, Policy);
Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we have test coverage for emitting a vslideup. I think we can add a test for an undef in the middle of the build_vector, and another test with an undef at the start of the build_vector?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added tests for both cases

Offset, Mask, VL);
}
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
Expand Down
107 changes: 107 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1828,3 +1828,110 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
%v7 = insertelement <8 x double> %v6, double %e7, i64 7
ret <8 x double> %v7
}

define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6) vscale_range(4, 128) {
; CHECK-LABEL: buildvec_slideup:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
; CHECK-NEXT: vfslide1up.vf v10, v8, fa6
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: vfslide1up.vf v10, v8, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa3
; CHECK-NEXT: vfslide1up.vf v10, v8, fa2
; CHECK-NEXT: vfslide1up.vf v12, v10, fa1
; CHECK-NEXT: vfslide1up.vf v8, v12, fa0
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
%v2 = insertelement <8 x double> %v1, double %e2, i64 2
%v3 = insertelement <8 x double> %v2, double %e3, i64 3
%v4 = insertelement <8 x double> %v3, double %e4, i64 4
%v5 = insertelement <8 x double> %v4, double %e5, i64 5
%v6 = insertelement <8 x double> %v5, double %e6, i64 6
%e7 = extractelement <4 x double> %v, i64 0
%v7 = insertelement <8 x double> %v6, double %e7, i64 7
ret <8 x double> %v7
}

; Negative test for slideup lowering where the extract_element was not build_vector's last operand.
define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) {
; CHECK-LABEL: buildvec_slideup_not_last_element:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
; CHECK-NEXT: vfmv.f.s ft0, v8
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, ft0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
%v2 = insertelement <8 x double> %v1, double %e2, i64 2
%v3 = insertelement <8 x double> %v2, double %e3, i64 3
%v4 = insertelement <8 x double> %v3, double %e4, i64 4
%v5 = insertelement <8 x double> %v4, double %e5, i64 5
%e6 = extractelement <4 x double> %v, i64 0
%v6 = insertelement <8 x double> %v5, double %e6, i64 6
%v7 = insertelement <8 x double> %v6, double %e7, i64 7
ret <8 x double> %v7
}

define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
; CHECK-LABEL: buildvec_vfredusum:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v9, v10, v16
; CHECK-NEXT: vfredusum.vs v10, v12, v16
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfmv.f.s fa4, v9
; CHECK-NEXT: vfmv.f.s fa3, v10
; CHECK-NEXT: vfredusum.vs v8, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: ret
%247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
%250 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
%251 = insertelement <4 x float> %248, float %250, i64 1
%252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
%253 = insertelement <4 x float> %251, float %252, i64 2
%254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
%255 = insertelement <4 x float> %253, float %254, i64 3
ret <4 x float> %255
}

define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
; CHECK-LABEL: buildvec_vfredosum:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfredosum.vs v9, v10, v16
; CHECK-NEXT: vfredosum.vs v10, v12, v16
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfmv.f.s fa4, v9
; CHECK-NEXT: vfmv.f.s fa3, v10
; CHECK-NEXT: vfredosum.vs v8, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: ret
%247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
%250 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
%251 = insertelement <4 x float> %248, float %250, i64 1
%252 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
%253 = insertelement <4 x float> %251, float %252, i64 2
%254 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
%255 = insertelement <4 x float> %253, float %254, i64 3
ret <4 x float> %255
}
Loading