@@ -4824,12 +4824,46 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
48244824 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
48254825}
48264826
4827+ // Given a vector a, b, c, d return a vector Factor times longer
4828+ // with Factor-1 undef's between elements. Ex:
4829+ // a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4830+ // undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4831+ static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4832+ const SDLoc &DL, SelectionDAG &DAG) {
4833+
4834+ MVT VT = V.getSimpleValueType();
4835+ unsigned EltBits = VT.getScalarSizeInBits();
4836+ ElementCount EC = VT.getVectorElementCount();
4837+ V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4838+
4839+ MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4840+
4841+ SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4842+ // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4843+ // allow the SHL to fold away if Index is 0.
4844+ if (Index != 0)
4845+ Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4846+ DAG.getConstant(EltBits * Index, DL, WideVT));
4847+ // Make sure to use original element type
4848+ MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),
4849+ EC.multiplyCoefficientBy(Factor));
4850+ return DAG.getBitcast(ResultVT, Result);
4851+ }
4852+
48274853// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
48284854// to create an interleaved vector of <[vscale x] n*2 x ty>.
48294855// This requires that the size of ty is less than the subtarget's maximum ELEN.
48304856static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
48314857 const SDLoc &DL, SelectionDAG &DAG,
48324858 const RISCVSubtarget &Subtarget) {
4859+
4860+ // FIXME: Not only does this optimize the code, it fixes some correctness
4861+ // issues because MIR does not have freeze.
4862+ if (EvenV.isUndef())
4863+ return getWideningSpread(OddV, 2, 1, DL, DAG);
4864+ if (OddV.isUndef())
4865+ return getWideningSpread(EvenV, 2, 0, DL, DAG);
4866+
48334867 MVT VecVT = EvenV.getSimpleValueType();
48344868 MVT VecContainerVT = VecVT; // <vscale x n x ty>
48354869 // Convert fixed vectors to scalable if needed
@@ -4861,29 +4895,14 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
48614895 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
48624896
48634897 SDValue Interleaved;
4864- if (OddV.isUndef()) {
4865- // If OddV is undef, this is a zero extend.
4866- // FIXME: Not only does this optimize the code, it fixes some correctness
4867- // issues because MIR does not have freeze.
4868- Interleaved =
4869- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4870- } else if (Subtarget.hasStdExtZvbb()) {
4898+ if (Subtarget.hasStdExtZvbb()) {
48714899 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
48724900 SDValue OffsetVec =
48734901 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
48744902 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
48754903 OffsetVec, Passthru, Mask, VL);
4876- if (!EvenV.isUndef())
4877- Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4878- Interleaved, EvenV, Passthru, Mask, VL);
4879- } else if (EvenV.isUndef()) {
4880- Interleaved =
4881- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4882-
4883- SDValue OffsetVec =
4884- DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4885- Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4886- Interleaved, OffsetVec, Passthru, Mask, VL);
4904+ Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4905+ Interleaved, EvenV, Passthru, Mask, VL);
48874906 } else {
48884907 // FIXME: We should freeze the odd vector here. We already handled the case
48894908 // of provably undef/poison above.
0 commit comments