@@ -40881,16 +40881,14 @@ namespace llvm {
4088140881/// combine-ordering. To fix this, we should do the redundant instruction
4088240882/// combining in this recursive walk.
4088340883static SDValue combineX86ShufflesRecursively(
40884- ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root ,
40884+ ArrayRef<SDValue> SrcOps, int SrcOpIndex, unsigned RootOpc, MVT RootVT ,
4088540885 ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
4088640886 unsigned MaxDepth, bool AllowVariableCrossLaneMask,
40887- bool AllowVariablePerLaneMask, SelectionDAG &DAG, const SDLoc &DL ,
40888- const X86Subtarget &Subtarget) {
40887+ bool AllowVariablePerLaneMask, bool IsMaskedShuffle, SelectionDAG &DAG ,
40888+ const SDLoc &DL, const X86Subtarget &Subtarget) {
4088940889 assert(!RootMask.empty() &&
4089040890 (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
4089140891 "Illegal shuffle root mask");
40892- unsigned RootOpc = Root.getOpcode();
40893- MVT RootVT = Root.getSimpleValueType();
4089440892 assert(RootVT.isVector() && "Shuffles operate on vector types!");
4089540893 unsigned RootSizeInBits = RootVT.getSizeInBits();
4089640894
@@ -41185,8 +41183,9 @@ static SDValue combineX86ShufflesRecursively(
4118541183 AllowPerLaneVar = AllowVariablePerLaneMask;
4118641184 }
4118741185 if (SDValue Res = combineX86ShufflesRecursively(
41188- Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth,
41189- AllowCrossLaneVar, AllowPerLaneVar, DAG, DL, Subtarget))
41186+ Ops, i, RootOpc, RootVT, ResolvedMask, CombinedNodes, Depth + 1,
41187+ MaxDepth, AllowCrossLaneVar, AllowPerLaneVar, IsMaskedShuffle,
41188+ DAG, DL, Subtarget))
4119041189 return Res;
4119141190 }
4119241191 }
@@ -41272,10 +41271,6 @@ static SDValue combineX86ShufflesRecursively(
4127241271 resolveTargetShuffleInputsAndMask(Ops, Mask);
4127341272 }
4127441273
41275- // If we are a AVX512/EVEX target the mask element size should match the root
41276- // element size to allow writemasks to be reused.
41277- bool IsMaskedShuffle = isMaskableNode(Root, Subtarget);
41278-
4127941274 // We can only combine unary and binary shuffle mask cases.
4128041275 if (Ops.size() <= 2) {
4128141276 // Minor canonicalization of the accumulated shuffle mask to make it easier
@@ -41328,8 +41323,9 @@ static SDValue combineX86ShufflesRecursively(
4132841323static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
4132941324 const X86Subtarget &Subtarget) {
4133041325 return combineX86ShufflesRecursively(
41331- {Op}, 0, Op, {0}, {}, /*Depth*/ 0, X86::MaxShuffleCombineDepth,
41332- /*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, DAG,
41326+ {Op}, 0, Op.getOpcode(), Op.getSimpleValueType(), {0}, {}, /*Depth*/ 0,
41327+ X86::MaxShuffleCombineDepth, /*AllowCrossLaneVarMask*/ true,
41328+ /*AllowPerLaneVarMask*/ true, isMaskableNode(Op, Subtarget), DAG,
4133341329 SDLoc(Op), Subtarget);
4133441330}
4133541331
@@ -41980,10 +41976,10 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4198041976 for (unsigned i = 0; i != Scale; ++i)
4198141977 DemandedMask[i] = i;
4198241978 if (SDValue Res = combineX86ShufflesRecursively(
41983- {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0 ,
41984- X86::MaxShuffleCombineDepth,
41985- /*AllowCrossLaneVarMask*/ true,
41986- /*AllowPerLaneVarMask */ true , DAG, DL, Subtarget))
41979+ {BC}, 0, BC.getOpcode(), BC.getSimpleValueType(), DemandedMask ,
41980+ {}, /*Depth*/ 0, X86::MaxShuffleCombineDepth,
41981+ /*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true,
41982+ /*IsMaskedShuffle */ false , DAG, DL, Subtarget))
4198741983 return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
4198841984 DAG.getBitcast(SrcVT, Res));
4198941985 }
@@ -43984,8 +43980,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4398443980 DemandedMask[i] = i;
4398543981
4398643982 SDValue NewShuffle = combineX86ShufflesRecursively(
43987- {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth,
43988- /*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, TLO.DAG,
43983+ {Op}, 0, Op.getOpcode(), Op.getSimpleValueType(), DemandedMask, {}, 0,
43984+ X86::MaxShuffleCombineDepth - Depth, /*AllowCrossLaneVarMask*/ true,
43985+ /*AllowPerLaneVarMask*/ true, isMaskableNode(Op, Subtarget), TLO.DAG,
4398943986 SDLoc(Op), Subtarget);
4399043987 if (NewShuffle)
4399143988 return TLO.CombineTo(Op, NewShuffle);
@@ -51620,10 +51617,10 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5162051617 }
5162151618
5162251619 if (SDValue Shuffle = combineX86ShufflesRecursively(
51623- {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1 ,
51624- X86::MaxShuffleCombineDepth,
51625- /*AllowVarCrossLaneMask*/ true,
51626- /*AllowVarPerLaneMask */ true , DAG, SDLoc(SrcVec), Subtarget))
51620+ {SrcVec}, 0, SrcVec.getOpcode(), SrcVec.getSimpleValueType() ,
51621+ ShuffleMask, {}, /*Depth*/ 1, X86::MaxShuffleCombineDepth,
51622+ /*AllowVarCrossLaneMask*/ true, /*AllowVarPerLaneMask*/ true,
51623+ /*IsMaskedShuffle */ false , DAG, SDLoc(SrcVec), Subtarget))
5162751624 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle,
5162851625 N0.getOperand(1));
5162951626 }
0 commit comments