@@ -4446,34 +4446,9 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
44464446 DAG.getUNDEF(VT), Scalar, VL);
44474447}
44484448
4449- // Is this a shuffle extracts either the even or odd elements of a vector?
4450- // That is, specifically, either (a) or (b) in the options below.
4451- // Single operand shuffle is easy:
4452- // a) t35: v8i8 = vector_shuffle<0,2,4,6,u,u,u,u> t34, undef
4453- // b) t35: v8i8 = vector_shuffle<1,3,5,7,u,u,u,u> t34, undef
4454- // Double operand shuffle:
4455- // t34: v8i8 = extract_subvector t11, Constant:i64<0>
4456- // t33: v8i8 = extract_subvector t11, Constant:i64<8>
4457- // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4458- // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4459- static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4460- SDValue V2, ArrayRef<int> Mask,
4461- const RISCVSubtarget &Subtarget) {
4462- // Need to be able to widen the vector.
4463- if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4464- return SDValue();
4465-
4466- // First index must be the first even or odd element from V1.
4467- if (Mask[0] != 0 && Mask[0] != 1)
4468- return SDValue();
4469-
4470- // The others must increase by 2 each time.
4471- for (unsigned i = 1; i != Mask.size(); ++i)
4472- if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2)
4473- return SDValue();
4474-
4475- if (1 == count_if(Mask, [](int Idx) { return Idx != -1; }))
4476- return SDValue();
4449+ // Can this shuffle be performed on exactly one (possibly larger) input?
4450+ static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4451+ SDValue V2) {
44774452
44784453 if (V2.isUndef() &&
44794454 RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8)
@@ -4490,17 +4465,19 @@ static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
44904465 return SDValue();
44914466
44924467 // Src needs to have twice the number of elements.
4493- if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4468+ unsigned NumElts = VT.getVectorNumElements();
4469+ if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
44944470 return SDValue();
44954471
44964472 // The extracts must extract the two halves of the source.
44974473 if (V1.getConstantOperandVal(1) != 0 ||
4498- V2.getConstantOperandVal(1) != Mask.size() )
4474+ V2.getConstantOperandVal(1) != NumElts )
44994475 return SDValue();
45004476
45014477 return Src;
45024478}
45034479
4480+
45044481/// Is this shuffle interleaving contiguous elements from one vector into the
45054482/// even elements and contiguous elements from another vector into the odd
45064483/// elements. \p EvenSrc will contain the element that should be in the first
@@ -4612,36 +4589,29 @@ static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
46124589 return Rotation;
46134590}
46144591
4615- // Lower a deinterleave shuffle to vnsrl.
4616- // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4617- // -> [p, q, r, s] (EvenElts == false)
4618- // VT is the type of the vector to return, <[vscale x ]n x ty>
4619- // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4620- static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
4621- bool EvenElts, SelectionDAG &DAG) {
4622- // The result is a vector of type <m x n x ty>. The source is a vector of
4623- // type <m x n*2 x ty> (For the single source case, the high half is undef)
4624- if (Src.getValueType() == VT) {
4625- EVT WideVT = VT.getDoubleNumVectorElementsVT();
4626- Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, DAG.getUNDEF(WideVT),
4627- Src, DAG.getVectorIdxConstant(0, DL));
4628- }
4629-
4630- // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4631- // This also converts FP to int.
4592+ // Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4593+ // 2, 4, 8 and the integer type Factor-times larger than VT's
4594+ // element type must be a legal element type.
4595+ // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4596+ // -> [p, q, r, s] (Factor=2, Index=1)
4597+ static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src,
4598+ unsigned Factor, unsigned Index,
4599+ SelectionDAG &DAG) {
46324600 unsigned EltBits = VT.getScalarSizeInBits();
4633- MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * 2),
4634- VT.getVectorElementCount());
4601+ ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4602+ MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4603+ SrcEC.divideCoefficientBy(Factor));
4604+ MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4605+ SrcEC.divideCoefficientBy(Factor));
46354606 Src = DAG.getBitcast(WideSrcVT, Src);
46364607
4637- MVT IntVT = VT.changeVectorElementTypeToInteger();
4638-
4639- // If we want even elements, then the shift amount is 0. Otherwise, shift by
4640- // the original element size.
4641- unsigned Shift = EvenElts ? 0 : EltBits;
4608+ unsigned Shift = Index * EltBits;
46424609 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
46434610 DAG.getConstant(Shift, DL, WideSrcVT));
4644- Res = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Res);
4611+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4612+ MVT IntVT = VT.changeVectorElementTypeToInteger();
4613+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT),
4614+ Res, DAG.getVectorIdxConstant(0, DL));
46454615 return DAG.getBitcast(VT, Res);
46464616}
46474617
@@ -5332,11 +5302,31 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
53325302 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
53335303 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
53345304
5335- // If this is a deinterleave and we can widen the vector, then we can use
5336- // vnsrl to deinterleave.
5337- if (SDValue Src =
5338- isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget))
5339- return getDeinterleaveViaVNSRL(DL, VT, Src, Mask[0] == 0, DAG);
5305+ // If this is a deinterleave(2,4,8) and we can widen the vector, then we can use
5306+ // shift and truncate to perform the shuffle.
5307+ // TODO: For Factor=6, we can perform the first step of the deinterleave via
5308+ // shift-and-trunc reducing total cost for everything except an mf8 result.
5309+ // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5310+ // to do the entire operation.
5311+ if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5312+ const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5313+ assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5314+ for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5315+ unsigned Index = 0;
5316+ if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5317+ 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5318+ if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2)) {
5319+ if (Src.getValueType() == VT) {
5320+ EVT WideVT = VT.getDoubleNumVectorElementsVT();
5321+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, DAG.getUNDEF(WideVT),
5322+ Src, DAG.getVectorIdxConstant(0, DL));
5323+ }
5324+ return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5325+ }
5326+ }
5327+ }
5328+ }
5329+
53405330
53415331 if (SDValue V =
53425332 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
@@ -10739,8 +10729,8 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
1073910729 // We can deinterleave through vnsrl.wi if the element type is smaller than
1074010730 // ELEN
1074110731 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10742- SDValue Even = getDeinterleaveViaVNSRL (DL, VecVT, Concat, true , DAG);
10743- SDValue Odd = getDeinterleaveViaVNSRL (DL, VecVT, Concat, false , DAG);
10732+ SDValue Even = getDeinterleaveShiftAndTrunc (DL, VecVT, Concat, 2, 0 , DAG);
10733+ SDValue Odd = getDeinterleaveShiftAndTrunc (DL, VecVT, Concat, 2, 1 , DAG);
1074410734 return DAG.getMergeValues({Even, Odd}, DL);
1074510735 }
1074610736
0 commit comments