@@ -4578,36 +4578,48 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
45784578 VL);
45794579}
45804580
4581- // Can this shuffle be performed on exactly one (possibly larger) input?
4582- static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
4583-
4584- if (V2.isUndef())
4585- return V1;
4586-
4581+ /// If concat_vector(V1,V2) could be folded away to some existing
4582+ /// vector source, return it. Note that the source may be larger
4583+ /// than the requested concat_vector (i.e. a extract_subvector
4584+ /// might be required.)
4585+ static SDValue foldConcatVector(SDValue V1, SDValue V2) {
4586+ EVT VT = V1.getValueType();
4587+ assert(VT == V2.getValueType() && "argument types must match");
45874588 // Both input must be extracts.
45884589 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
45894590 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
45904591 return SDValue();
45914592
45924593 // Extracting from the same source.
45934594 SDValue Src = V1.getOperand(0);
4594- if (Src != V2.getOperand(0))
4595- return SDValue();
4596-
4597- // Src needs to have twice the number of elements.
4598- unsigned NumElts = VT.getVectorNumElements();
4599- if (!Src.getValueType().isFixedLengthVector() ||
4600- Src.getValueType().getVectorNumElements() != (NumElts * 2))
4595+ if (Src != V2.getOperand(0) ||
4596+ VT.isScalableVector() != Src.getValueType().isScalableVector())
46014597 return SDValue();
46024598
46034599 // The extracts must extract the two halves of the source.
46044600 if (V1.getConstantOperandVal(1) != 0 ||
4605- V2.getConstantOperandVal(1) != NumElts )
4601+ V2.getConstantOperandVal(1) != VT.getVectorMinNumElements() )
46064602 return SDValue();
46074603
46084604 return Src;
46094605}
46104606
4607+ // Can this shuffle be performed on exactly one (possibly larger) input?
4608+ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
4609+
4610+ if (V2.isUndef())
4611+ return V1;
4612+
4613+ unsigned NumElts = VT.getVectorNumElements();
4614+ // Src needs to have twice the number of elements.
4615+ // TODO: Update shuffle lowering to add the extract subvector
4616+ if (SDValue Src = foldConcatVector(V1, V2);
4617+ Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4618+ return Src;
4619+
4620+ return SDValue();
4621+ }
4622+
46114623/// Is this shuffle interleaving contiguous elements from one vector into the
46124624/// even elements and contiguous elements from another vector into the odd
46134625/// elements. \p EvenSrc will contain the element that should be in the first
@@ -11519,12 +11531,27 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
1151911531 return DAG.getMergeValues(Res, DL);
1152011532 }
1152111533
11522- // TODO: Remove the e64 restriction once the fractional LMUL lowering
11523- // is improved to always beat the vnsrl lowering below.
11524- if (Subtarget.hasVendorXRivosVizip() && Factor == 2 &&
11525- VecVT.getVectorElementType().getSizeInBits() == 64) {
11534+ if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
11535+ MVT VT = Op->getSimpleValueType(0);
1152611536 SDValue V1 = Op->getOperand(0);
1152711537 SDValue V2 = Op->getOperand(1);
11538+
11539+ // For fractional LMUL, check if we can use a higher LMUL
11540+ // instruction to avoid a vslidedown.
11541+ if (SDValue Src = foldConcatVector(V1, V2);
11542+ Src && getLMUL1VT(VT).bitsGT(VT)) {
11543+ EVT NewVT = VT.getDoubleNumVectorElementsVT();
11544+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
11545+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewVT, Src, ZeroIdx);
11546+ SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
11547+ DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
11548+ SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
11549+ DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
11550+ Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Even, ZeroIdx);
11551+ Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Odd, ZeroIdx);
11552+ return DAG.getMergeValues({Even, Odd}, DL);
11553+ }
11554+
1152811555 SDValue Even =
1152911556 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
1153011557 SDValue Odd =
0 commit comments