@@ -41903,7 +41903,8 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
4190341903static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
4190441904 ArrayRef<SDValue> Ops, SelectionDAG &DAG,
4190541905 TargetLowering::DAGCombinerInfo &DCI,
41906- const X86Subtarget &Subtarget);
41906+ const X86Subtarget &Subtarget,
41907+ unsigned Depth = 0);
4190741908
4190841909/// Try to combine x86 target specific shuffles.
4190941910static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
@@ -57791,7 +57792,8 @@ CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS,
5779157792static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5779257793 ArrayRef<SDValue> Ops, SelectionDAG &DAG,
5779357794 TargetLowering::DAGCombinerInfo &DCI,
57794- const X86Subtarget &Subtarget) {
57795+ const X86Subtarget &Subtarget,
57796+ unsigned Depth) {
5779557797 assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
5779657798 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5779757799
@@ -57803,6 +57805,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5780357805 }))
5780457806 return getZeroVector(VT, Subtarget, DAG, DL);
5780557807
57808+ if (Depth >= SelectionDAG::MaxRecursionDepth)
57809+ return SDValue(); // Limit search depth.
57810+
5780657811 SDValue Op0 = Ops[0];
5780757812 bool IsSplat = llvm::all_equal(Ops);
5780857813 unsigned NumOps = Ops.size();
@@ -57933,6 +57938,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5793357938 }
5793457939 return AllConstants || AllSubs;
5793557940 };
57941+ auto CombineSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
57942+ bool AllConstants = true;
57943+ SmallVector<SDValue> Subs;
57944+ for (SDValue SubOp : SubOps) {
57945+ SDValue BC = peekThroughBitcasts(SubOp.getOperand(I));
57946+ AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
57947+ ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
57948+ Subs.push_back(SubOp.getOperand(I));
57949+ }
57950+ if (AllConstants)
57951+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
57952+ return combineConcatVectorOps(DL, VT, Subs, DAG, DCI, Subtarget,
57953+ Depth + 1);
57954+ };
5793657955
5793757956 switch (Op0.getOpcode()) {
5793857957 case ISD::VECTOR_SHUFFLE: {
@@ -58354,14 +58373,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5835458373 case ISD::FADD:
5835558374 case ISD::FSUB:
5835658375 case ISD::FMUL:
58357- if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) &&
58358- (VT.is256BitVector() ||
58359- (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58360- return DAG.getNode(Op0.getOpcode(), DL, VT,
58361- ConcatSubOperand(VT, Ops, 0),
58362- ConcatSubOperand(VT, Ops, 1));
58376+ if (!IsSplat && (VT.is256BitVector() ||
58377+ (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58378+ SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
58379+ SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58380+ if (Concat0 || Concat1)
58381+ return DAG.getNode(Op0.getOpcode(), DL, VT,
58382+ Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
58383+ Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
5836358384 }
5836458385 break;
58386+ // Always prefer to concatenate high latency FDIV instructions.
5836558387 case ISD::FDIV:
5836658388 if (!IsSplat && (VT.is256BitVector() ||
5836758389 (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
0 commit comments