@@ -42610,7 +42610,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4261042610 // Combine VPERMV3 to widened VPERMV if the two source operands can be
4261142611 // freely concatenated.
4261242612 MVT WideVT = VT.getDoubleNumVectorElementsVT();
42613- MVT MaskVT = N.getOperand(1).getSimpleValueType();
4261442613 bool CanConcat = VT.is128BitVector() ||
4261542614 (VT.is256BitVector() && Subtarget.useAVX512Regs());
4261642615 if (CanConcat) {
@@ -42634,12 +42633,10 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4263442633 DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG, DCI,
4263542634 Subtarget)) {
4263642635 ShuffleVectorSDNode::commuteMask(Mask);
42637- SDValue NewMask =
42638- getConstVector(Mask, MaskVT, DAG, DL, /*IsMask=*/true);
42639- NewMask = widenSubVector(NewMask, false, Subtarget, DAG, DL,
42640- WideVT.getSizeInBits());
42636+ Mask.append(NumElts, SM_SentinelUndef);
4264142637 SDValue Perm =
42642- DAG.getNode(X86ISD::VPERMV, DL, WideVT, NewMask, ConcatSrc);
42638+ lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42639+ DAG.getUNDEF(WideVT), Subtarget, DAG);
4264342640 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
4264442641 DAG.getVectorIdxConstant(0, DL));
4264542642 }
@@ -42649,10 +42646,9 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4264942646 // Canonicalize to VPERMV if both sources are the same.
4265042647 if (V1 == V2) {
4265142648 for (int &M : Mask)
42652- M = (M < 0 ? M : M & (Mask.size() - 1));
42653- SDValue NewMask = getConstVector(Mask, MaskVT, DAG, DL,
42654- /*IsMask=*/true);
42655- return DAG.getNode(X86ISD::VPERMV, DL, VT, NewMask, N.getOperand(0));
42649+ M = (M < 0 ? M : (M & (NumElts - 1)));
42650+ return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(0),
42651+ DAG.getUNDEF(VT), Subtarget, DAG);
4265642652 }
4265742653 // If sources are half width, then concat and use VPERMV with adjusted
4265842654 // mask.
@@ -42667,19 +42663,16 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4266742663 combineConcatVectorOps(DL, VT, Ops, DAG, DCI, Subtarget)) {
4266842664 for (int &M : Mask)
4266942665 M = (M < (int)NumElts ? M : (M - (NumElts / 2)));
42670- SDValue NewMask = getConstVector(Mask, MaskVT, DAG, DL,
42671- /*IsMask=*/true);
42672- return DAG.getNode(X86ISD::VPERMV, DL, VT, NewMask, ConcatSrc);
42666+ return lowerShuffleWithPERMV(DL, VT, Mask, ConcatSrc,
42667+ DAG.getUNDEF(VT), Subtarget, DAG);
4267342668 }
4267442669 }
4267542670 // Commute foldable source to the RHS.
4267642671 if (isShuffleFoldableLoad(N.getOperand(0)) &&
4267742672 !isShuffleFoldableLoad(N.getOperand(2))) {
4267842673 ShuffleVectorSDNode::commuteMask(Mask);
42679- SDValue NewMask =
42680- getConstVector(Mask, MaskVT, DAG, DL, /*IsMask=*/true);
42681- return DAG.getNode(X86ISD::VPERMV3, DL, VT, N.getOperand(2), NewMask,
42682- N.getOperand(0));
42674+ return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(2),
42675+ N.getOperand(0), Subtarget, DAG);
4268342676 }
4268442677 }
4268542678 return SDValue();
@@ -58048,10 +58041,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5804858041 if (ConcatMask.size() == (NumOps * NumSrcElts)) {
5804958042 SDValue Src = concatSubVectors(Ops[0].getOperand(1),
5805058043 Ops[1].getOperand(1), DAG, DL);
58051- MVT IntMaskSVT = MVT::getIntegerVT(EltSizeInBits);
58052- MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
58053- SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
58054- return DAG.getNode(X86ISD::VPERMV, DL, VT, Mask, Src);
58044+ return lowerShuffleWithPERMV(DL, VT, ConcatMask, Src,
58045+ DAG.getUNDEF(VT), Subtarget, DAG);
5805558046 }
5805658047 }
5805758048 break;
@@ -58080,10 +58071,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5808058071 if (ConcatMask.size() == (NumOps * NumSrcElts)) {
5808158072 SDValue Src0 = ConcatSubOperand(VT, Ops, 0);
5808258073 SDValue Src1 = ConcatSubOperand(VT, Ops, 2);
58083- MVT IntMaskSVT = MVT::getIntegerVT(EltSizeInBits);
58084- MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
58085- SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
58086- return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);
58074+ return lowerShuffleWithPERMV(DL, VT, ConcatMask, Src0, Src1,
58075+ Subtarget, DAG);
5808758076 }
5808858077 }
5808958078 break;
0 commit comments