@@ -4816,12 +4816,46 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
48164816 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
48174817}
48184818
4819+ // Given a vector a, b, c, d return a vector Factor times longer
4820+ // with Factor-1 undef's between elements. Ex:
4821+ // a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4822+ // undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4823+ static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4824+ const SDLoc &DL, SelectionDAG &DAG) {
4825+
4826+ MVT VT = V.getSimpleValueType();
4827+ unsigned EltBits = VT.getScalarSizeInBits();
4828+ ElementCount EC = VT.getVectorElementCount();
4829+ V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4830+
4831+ MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4832+
4833+ SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4834+ // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4835+ // allow the SHL to fold away if Index is 0.
4836+ if (Index != 0)
4837+ Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4838+ DAG.getConstant(EltBits * Index, DL, WideVT));
4839+ // Make sure to use original element type
4840+ MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),
4841+ EC.multiplyCoefficientBy(Factor));
4842+ return DAG.getBitcast(ResultVT, Result);
4843+ }
4844+
48194845// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
48204846// to create an interleaved vector of <[vscale x] n*2 x ty>.
48214847// This requires that the size of ty is less than the subtarget's maximum ELEN.
48224848static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
48234849 const SDLoc &DL, SelectionDAG &DAG,
48244850 const RISCVSubtarget &Subtarget) {
4851+
4852+ // FIXME: Not only does this optimize the code, it fixes some correctness
4853+ // issues because MIR does not have freeze.
4854+ if (EvenV.isUndef())
4855+ return getWideningSpread(OddV, 2, 1, DL, DAG);
4856+ else if (OddV.isUndef())
4857+ return getWideningSpread(EvenV, 2, 0, DL, DAG);
4858+
48254859 MVT VecVT = EvenV.getSimpleValueType();
48264860 MVT VecContainerVT = VecVT; // <vscale x n x ty>
48274861 // Convert fixed vectors to scalable if needed
@@ -4853,29 +4887,14 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
48534887 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
48544888
48554889 SDValue Interleaved;
4856- if (OddV.isUndef()) {
4857- // If OddV is undef, this is a zero extend.
4858- // FIXME: Not only does this optimize the code, it fixes some correctness
4859- // issues because MIR does not have freeze.
4860- Interleaved =
4861- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4862- } else if (Subtarget.hasStdExtZvbb()) {
4890+ if (Subtarget.hasStdExtZvbb()) {
48634891 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
48644892 SDValue OffsetVec =
48654893 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
48664894 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
48674895 OffsetVec, Passthru, Mask, VL);
4868- if (!EvenV.isUndef())
4869- Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4870- Interleaved, EvenV, Passthru, Mask, VL);
4871- } else if (EvenV.isUndef()) {
4872- Interleaved =
4873- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4874-
4875- SDValue OffsetVec =
4876- DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4877- Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4878- Interleaved, OffsetVec, Passthru, Mask, VL);
4896+ Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4897+ Interleaved, EvenV, Passthru, Mask, VL);
48794898 } else {
48804899 // FIXME: We should freeze the odd vector here. We already handled the case
48814900 // of provably undef/poison above.
0 commit comments