@@ -42673,40 +42673,13 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4267342673 return SDValue();
4267442674 }
4267542675 case X86ISD::VPERMV3: {
42676- // Combine VPERMV3 to widened VPERMV if the two source operands can be
42677- // freely concatenated.
4267842676 MVT WideVT = VT.getDoubleNumVectorElementsVT();
4267942677 bool CanConcat = VT.is128BitVector() ||
4268042678 (VT.is256BitVector() && Subtarget.useAVX512Regs());
42681- if (CanConcat) {
42682- SDValue Ops[] = {N.getOperand(0), N.getOperand(2)};
42683- if (SDValue ConcatSrc =
42684- combineConcatVectorOps(DL, WideVT, Ops, DAG, Subtarget)) {
42685- SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
42686- DL, WideVT.getSizeInBits());
42687- SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, ConcatSrc);
42688- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42689- DAG.getVectorIdxConstant(0, DL));
42690- }
42691- }
4269242679 SmallVector<SDValue, 2> SrcOps;
4269342680 SmallVector<int, 32> Mask;
4269442681 if (getTargetShuffleMask(N, /*AllowSentinelZero=*/false, SrcOps, Mask)) {
4269542682 assert(Mask.size() == NumElts && "Unexpected shuffle mask size");
42696- // See if we can concatenate the commuted operands.
42697- if (CanConcat) {
42698- if (SDValue ConcatSrc = combineConcatVectorOps(
42699- DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG,
42700- Subtarget)) {
42701- ShuffleVectorSDNode::commuteMask(Mask);
42702- Mask.append(NumElts, SM_SentinelUndef);
42703- SDValue Perm =
42704- lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42705- DAG.getUNDEF(WideVT), Subtarget, DAG);
42706- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42707- DAG.getVectorIdxConstant(0, DL));
42708- }
42709- }
4271042683 SDValue V1 = peekThroughBitcasts(N.getOperand(0));
4271142684 SDValue V2 = peekThroughBitcasts(N.getOperand(2));
4271242685 // Canonicalize to VPERMV if both sources are the same.
@@ -42740,6 +42713,33 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4274042713 return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(2),
4274142714 N.getOperand(0), Subtarget, DAG);
4274242715 }
42716+ // Combine VPERMV3 to widened VPERMV if the two source operands can be
42717+ // freely concatenated, with a commuted shuffle mask.
42718+ if (CanConcat) {
42719+ if (SDValue ConcatSrc = combineConcatVectorOps(
42720+ DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG,
42721+ Subtarget)) {
42722+ ShuffleVectorSDNode::commuteMask(Mask);
42723+ Mask.append(NumElts, SM_SentinelUndef);
42724+ SDValue Perm =
42725+ lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42726+ DAG.getUNDEF(WideVT), Subtarget, DAG);
42727+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42728+ DAG.getVectorIdxConstant(0, DL));
42729+ }
42730+ }
42731+ }
42732+ // Combine VPERMV3 to widened VPERMV if the two source operands can be
42733+ // freely concatenated.
42734+ if (CanConcat) {
42735+ if (SDValue ConcatSrc = combineConcatVectorOps(
42736+ DL, WideVT, {N.getOperand(0), N.getOperand(2)}, DAG, Subtarget)) {
42737+ SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
42738+ DL, WideVT.getSizeInBits());
42739+ SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, ConcatSrc);
42740+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42741+ DAG.getVectorIdxConstant(0, DL));
42742+ }
4274342743 }
4274442744 return SDValue();
4274542745 }
0 commit comments