@@ -58739,18 +58739,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5873958739 DAG.getNode(X86ISD::VPERMILPI, DL, FloatVT, Res, Op0.getOperand(1));
5874058740 return DAG.getBitcast(VT, Res);
5874158741 }
58742- if (!IsSplat && VT == MVT::v8f64) {
58743- unsigned NumSubElts = Op0.getValueType().getVectorNumElements();
58744- uint64_t Mask = (1ULL << NumSubElts) - 1;
58745- uint64_t Idx = 0;
58746- for (unsigned I = 0; I != NumOps; ++I) {
58747- uint64_t SubIdx = Ops[I].getConstantOperandVal(1);
58748- Idx |= (SubIdx & Mask) << (I * NumSubElts);
58749- }
58750- return DAG.getNode(X86ISD::VPERMILPI, DL, VT,
58751- ConcatSubOperand(VT, Ops, 0),
58752- DAG.getTargetConstant(Idx, DL, MVT::i8));
58753- }
5875458742 break;
5875558743 case X86ISD::VPERMILPV:
5875658744 if (!IsSplat && (VT.is256BitVector() ||
@@ -59322,7 +59310,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5932259310 }
5932359311
5932459312 // We can always convert per-lane vXf64 shuffles into VSHUFPD.
59325- if (!IsSplat && NumOps == 2 && VT == MVT::v4f64 &&
59313+ if (!IsSplat &&
59314+ (VT == MVT::v4f64 || (VT == MVT::v8f64 && Subtarget.useAVX512Regs())) &&
5932659315 all_of(Ops, [](SDValue Op) {
5932759316 return Op.hasOneUse() && (Op.getOpcode() == X86ISD::MOVDDUP ||
5932859317 Op.getOpcode() == X86ISD::SHUFP ||
@@ -59331,25 +59320,33 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5933159320 Op.getOpcode() == X86ISD::UNPCKL ||
5933259321 Op.getOpcode() == X86ISD::UNPCKH);
5933359322 })) {
59334- SmallVector<SDValue, 2> SrcOps0, SrcOps1;
59335- SmallVector<int, 8> SrcMask0, SrcMask1;
59336- if (getTargetShuffleMask(Ops[0], /*AllowSentinelZero=*/false, SrcOps0,
59337- SrcMask0) &&
59338- getTargetShuffleMask(Ops[1], /*AllowSentinelZero=*/false, SrcOps1,
59339- SrcMask1)) {
59340- assert(SrcMask0.size() == 2 && SrcMask1.size() == 2 && "Bad shuffles");
59341- SDValue LHS[] = {SrcOps0[SrcMask0[0] / 2], SrcOps1[SrcMask1[0] / 2]};
59342- SDValue RHS[] = {SrcOps0[SrcMask0[1] / 2], SrcOps1[SrcMask1[1] / 2]};
59323+ MVT OpVT = Ops[0].getSimpleValueType();
59324+ unsigned NumOpElts = OpVT.getVectorNumElements();
59325+ SmallVector<SmallVector<SDValue, 2>, 4> SrcOps(NumOps);
59326+ SmallVector<SmallVector<int, 8>, 4> SrcMasks(NumOps);
59327+ if (all_of(seq<int>(NumOps), [&](int I) {
59328+ return getTargetShuffleMask(Ops[I], /*AllowSentinelZero=*/false,
59329+ SrcOps[I], SrcMasks[I]) &&
59330+ SrcMasks[I].size() == NumOpElts &&
59331+ all_of(SrcOps[I], [&OpVT](SDValue V) {
59332+ return V.getValueType() == OpVT;
59333+ });
59334+ })) {
59335+ bool Unary = true;
59336+ unsigned SHUFPDMask = 0;
59337+ SmallVector<SDValue, 4> LHS(NumOps), RHS(NumOps);
59338+ for (unsigned I = 0; I != NumOps; ++I) {
59339+ LHS[I] = SrcOps[I][SrcMasks[I][0] / NumOpElts];
59340+ RHS[I] = SrcOps[I][SrcMasks[I][1] / NumOpElts];
59341+ Unary &= LHS[I] == RHS[I];
59342+ for (unsigned J = 0; J != NumOpElts; ++J)
59343+ SHUFPDMask |= (SrcMasks[I][J] & 1) << ((I * NumOpElts) + J);
59344+ }
5934359345 SDValue Concat0 =
5934459346 combineConcatVectorOps(DL, VT, LHS, DAG, Subtarget, Depth + 1);
5934559347 SDValue Concat1 =
5934659348 combineConcatVectorOps(DL, VT, RHS, DAG, Subtarget, Depth + 1);
59347- if (Concat0 || Concat1) {
59348- unsigned SHUFPDMask = 0;
59349- SHUFPDMask |= (SrcMask0[0] & 1) << 0;
59350- SHUFPDMask |= (SrcMask0[1] & 1) << 1;
59351- SHUFPDMask |= (SrcMask1[0] & 1) << 2;
59352- SHUFPDMask |= (SrcMask1[1] & 1) << 3;
59349+ if (Unary || Concat0 || Concat1) {
5935359350 Concat0 =
5935459351 Concat0 ? Concat0 : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS);
5935559352 Concat1 =
0 commit comments