@@ -41903,7 +41903,8 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
4190341903static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
4190441904 ArrayRef<SDValue> Ops, SelectionDAG &DAG,
4190541905 TargetLowering::DAGCombinerInfo &DCI,
41906- const X86Subtarget &Subtarget);
41906+ const X86Subtarget &Subtarget,
41907+ unsigned Depth = 0);
4190741908
4190841909/// Try to combine x86 target specific shuffles.
4190941910static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
@@ -57791,7 +57792,8 @@ CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS,
5779157792static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5779257793 ArrayRef<SDValue> Ops, SelectionDAG &DAG,
5779357794 TargetLowering::DAGCombinerInfo &DCI,
57794- const X86Subtarget &Subtarget) {
57795+ const X86Subtarget &Subtarget,
57796+ unsigned Depth) {
5779557797 assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
5779657798 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5779757799
@@ -57803,6 +57805,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5780357805 }))
5780457806 return getZeroVector(VT, Subtarget, DAG, DL);
5780557807
57808+ if (Depth >= SelectionDAG::MaxRecursionDepth)
57809+ return SDValue(); // Limit search depth.
57810+
5780657811 SDValue Op0 = Ops[0];
5780757812 bool IsSplat = llvm::all_equal(Ops);
5780857813 unsigned NumOps = Ops.size();
@@ -57933,6 +57938,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5793357938 }
5793457939 return AllConstants || AllSubs;
5793557940 };
57941+ auto CombineSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
57942+ bool AllConstants = true;
57943+ SmallVector<SDValue> Subs;
57944+ for (SDValue SubOp : SubOps) {
57945+ SDValue BC = peekThroughBitcasts(SubOp.getOperand(I));
57946+ AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
57947+ ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
57948+ Subs.push_back(SubOp.getOperand(I));
57949+ }
57950+ if (AllConstants)
57951+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
57952+ return combineConcatVectorOps(DL, VT, Subs, DAG, DCI, Subtarget,
57953+ Depth + 1);
57954+ };
5793657955
5793757956 switch (Op0.getOpcode()) {
5793857957 case ISD::VECTOR_SHUFFLE: {
@@ -58343,9 +58362,12 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5834358362 if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
5834458363 (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
5834558364 (EltSizeInBits >= 32 || Subtarget.useBWIRegs())))) {
58346- return DAG.getNode(Op0.getOpcode(), DL, VT,
58347- ConcatSubOperand(VT, Ops, 0),
58348- ConcatSubOperand(VT, Ops, 1));
58365+ SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
58366+ SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58367+ if (Concat0 || Concat1)
58368+ return DAG.getNode(Op0.getOpcode(), DL, VT,
58369+ Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
58370+ Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
5834958371 }
5835058372 break;
5835158373 // Due to VADD, VSUB, VMUL can executed on more ports than VINSERT and
@@ -58354,12 +58376,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5835458376 case ISD::FADD:
5835558377 case ISD::FSUB:
5835658378 case ISD::FMUL:
58357- if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) &&
58358- (VT.is256BitVector() ||
58359- (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58360- return DAG.getNode(Op0.getOpcode(), DL, VT,
58361- ConcatSubOperand(VT, Ops, 0),
58362- ConcatSubOperand(VT, Ops, 1));
58379+ if (!IsSplat && (VT.is256BitVector() ||
58380+ (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58381+ SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
58382+ SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58383+ if (Concat0 || Concat1)
58384+ return DAG.getNode(Op0.getOpcode(), DL, VT,
58385+ Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
58386+ Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
5836358387 }
5836458388 break;
5836558389 case ISD::FDIV:
0 commit comments