@@ -42610,7 +42610,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42610
42610
// Combine VPERMV3 to widened VPERMV if the two source operands can be
42611
42611
// freely concatenated.
42612
42612
MVT WideVT = VT.getDoubleNumVectorElementsVT();
42613
- MVT MaskVT = N.getOperand(1).getSimpleValueType();
42614
42613
bool CanConcat = VT.is128BitVector() ||
42615
42614
(VT.is256BitVector() && Subtarget.useAVX512Regs());
42616
42615
if (CanConcat) {
@@ -42634,12 +42633,10 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42634
42633
DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG, DCI,
42635
42634
Subtarget)) {
42636
42635
ShuffleVectorSDNode::commuteMask(Mask);
42637
- SDValue NewMask =
42638
- getConstVector(Mask, MaskVT, DAG, DL, /*IsMask=*/true);
42639
- NewMask = widenSubVector(NewMask, false, Subtarget, DAG, DL,
42640
- WideVT.getSizeInBits());
42636
+ Mask.append(NumElts, SM_SentinelUndef);
42641
42637
SDValue Perm =
42642
- DAG.getNode(X86ISD::VPERMV, DL, WideVT, NewMask, ConcatSrc);
42638
+ lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42639
+ DAG.getUNDEF(WideVT), Subtarget, DAG);
42643
42640
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42644
42641
DAG.getVectorIdxConstant(0, DL));
42645
42642
}
@@ -42649,10 +42646,9 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42649
42646
// Canonicalize to VPERMV if both sources are the same.
42650
42647
if (V1 == V2) {
42651
42648
for (int &M : Mask)
42652
- M = (M < 0 ? M : M & (Mask.size() - 1));
42653
- SDValue NewMask = getConstVector(Mask, MaskVT, DAG, DL,
42654
- /*IsMask=*/true);
42655
- return DAG.getNode(X86ISD::VPERMV, DL, VT, NewMask, N.getOperand(0));
42649
+ M = (M < 0 ? M : (M & (NumElts - 1)));
42650
+ return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(0),
42651
+ DAG.getUNDEF(VT), Subtarget, DAG);
42656
42652
}
42657
42653
// If sources are half width, then concat and use VPERMV with adjusted
42658
42654
// mask.
@@ -42667,19 +42663,16 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42667
42663
combineConcatVectorOps(DL, VT, Ops, DAG, DCI, Subtarget)) {
42668
42664
for (int &M : Mask)
42669
42665
M = (M < (int)NumElts ? M : (M - (NumElts / 2)));
42670
- SDValue NewMask = getConstVector(Mask, MaskVT, DAG, DL,
42671
- /*IsMask=*/true);
42672
- return DAG.getNode(X86ISD::VPERMV, DL, VT, NewMask, ConcatSrc);
42666
+ return lowerShuffleWithPERMV(DL, VT, Mask, ConcatSrc,
42667
+ DAG.getUNDEF(VT), Subtarget, DAG);
42673
42668
}
42674
42669
}
42675
42670
// Commute foldable source to the RHS.
42676
42671
if (isShuffleFoldableLoad(N.getOperand(0)) &&
42677
42672
!isShuffleFoldableLoad(N.getOperand(2))) {
42678
42673
ShuffleVectorSDNode::commuteMask(Mask);
42679
- SDValue NewMask =
42680
- getConstVector(Mask, MaskVT, DAG, DL, /*IsMask=*/true);
42681
- return DAG.getNode(X86ISD::VPERMV3, DL, VT, N.getOperand(2), NewMask,
42682
- N.getOperand(0));
42674
+ return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(2),
42675
+ N.getOperand(0), Subtarget, DAG);
42683
42676
}
42684
42677
}
42685
42678
return SDValue();
@@ -58048,10 +58041,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
58048
58041
if (ConcatMask.size() == (NumOps * NumSrcElts)) {
58049
58042
SDValue Src = concatSubVectors(Ops[0].getOperand(1),
58050
58043
Ops[1].getOperand(1), DAG, DL);
58051
- MVT IntMaskSVT = MVT::getIntegerVT(EltSizeInBits);
58052
- MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
58053
- SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
58054
- return DAG.getNode(X86ISD::VPERMV, DL, VT, Mask, Src);
58044
+ return lowerShuffleWithPERMV(DL, VT, ConcatMask, Src,
58045
+ DAG.getUNDEF(VT), Subtarget, DAG);
58055
58046
}
58056
58047
}
58057
58048
break;
@@ -58080,10 +58071,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
58080
58071
if (ConcatMask.size() == (NumOps * NumSrcElts)) {
58081
58072
SDValue Src0 = ConcatSubOperand(VT, Ops, 0);
58082
58073
SDValue Src1 = ConcatSubOperand(VT, Ops, 2);
58083
- MVT IntMaskSVT = MVT::getIntegerVT(EltSizeInBits);
58084
- MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
58085
- SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
58086
- return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);
58074
+ return lowerShuffleWithPERMV(DL, VT, ConcatMask, Src0, Src1,
58075
+ Subtarget, DAG);
58087
58076
}
58088
58077
}
58089
58078
break;
0 commit comments