@@ -39580,7 +39580,7 @@ static bool matchBinaryPermuteShuffle(
3958039580
3958139581static SDValue combineX86ShuffleChainWithExtract(
3958239582 ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
39583- bool HasVariableMask , bool AllowVariableCrossLaneMask,
39583+ ArrayRef<const SDNode *> SrcNodes , bool AllowVariableCrossLaneMask,
3958439584 bool AllowVariablePerLaneMask, SelectionDAG &DAG,
3958539585 const X86Subtarget &Subtarget);
3958639586
@@ -39595,7 +39595,7 @@ static SDValue combineX86ShuffleChainWithExtract(
3959539595/// instruction but should only be used to replace chains over a certain depth.
3959639596static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3959739597 ArrayRef<int> BaseMask, int Depth,
39598- bool HasVariableMask ,
39598+ ArrayRef<const SDNode *> SrcNodes ,
3959939599 bool AllowVariableCrossLaneMask,
3960039600 bool AllowVariablePerLaneMask,
3960139601 SelectionDAG &DAG,
@@ -40064,6 +40064,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4006440064 if (Depth < 1)
4006540065 return SDValue();
4006640066
40067+ bool HasVariableMask = llvm::any_of(SrcNodes, [](const SDNode *N) {
40068+ return isTargetShuffleVariableMask(N->getOpcode());
40069+ });
40070+
4006740071 // Depth threshold above which we can efficiently use variable mask shuffles.
4006840072 int VariableCrossLaneShuffleDepth =
4006940073 Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2;
@@ -40134,9 +40138,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4013440138 // If that failed and either input is extracted then try to combine as a
4013540139 // shuffle with the larger type.
4013640140 if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
40137- Inputs, Root, BaseMask, Depth, HasVariableMask,
40138- AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG,
40139- Subtarget))
40141+ Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask,
40142+ AllowVariablePerLaneMask, DAG, Subtarget))
4014040143 return WideShuffle;
4014140144
4014240145 // If we have a dual input lane-crossing shuffle then lower to VPERMV3,
@@ -40307,8 +40310,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4030740310 // If that failed and either input is extracted then try to combine as a
4030840311 // shuffle with the larger type.
4030940312 if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
40310- Inputs, Root, BaseMask, Depth, HasVariableMask ,
40311- AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget))
40313+ Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask ,
40314+ AllowVariablePerLaneMask, DAG, Subtarget))
4031240315 return WideShuffle;
4031340316
4031440317 // If we have a dual input shuffle then lower to VPERMV3,
@@ -40346,7 +40349,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4034640349// extract_subvector(shuffle(x,y,m2),0)
4034740350static SDValue combineX86ShuffleChainWithExtract(
4034840351 ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
40349- bool HasVariableMask , bool AllowVariableCrossLaneMask,
40352+ ArrayRef<const SDNode *> SrcNodes , bool AllowVariableCrossLaneMask,
4035040353 bool AllowVariablePerLaneMask, SelectionDAG &DAG,
4035140354 const X86Subtarget &Subtarget) {
4035240355 unsigned NumMaskElts = BaseMask.size();
@@ -40475,7 +40478,7 @@ static SDValue combineX86ShuffleChainWithExtract(
4047540478
4047640479 if (SDValue WideShuffle =
4047740480 combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth,
40478- HasVariableMask , AllowVariableCrossLaneMask,
40481+ SrcNodes , AllowVariableCrossLaneMask,
4047940482 AllowVariablePerLaneMask, DAG, Subtarget)) {
4048040483 WideShuffle =
4048140484 extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
@@ -40698,7 +40701,7 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
4069840701// TODO: Extend this to merge multiple constant Ops and update the mask.
4069940702static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
4070040703 ArrayRef<int> Mask,
40701- bool HasVariableMask ,
40704+ ArrayRef<const SDNode *> SrcNodes ,
4070240705 SelectionDAG &DAG, const SDLoc &DL,
4070340706 const X86Subtarget &Subtarget) {
4070440707 unsigned SizeInBits = VT.getSizeInBits();
@@ -40720,6 +40723,9 @@ static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
4072040723 // only used once or the combined shuffle has included a variable mask
4072140724 // shuffle, this is to avoid constant pool bloat.
4072240725 bool IsOptimizingSize = DAG.shouldOptForSize();
40726+ bool HasVariableMask = llvm::any_of(SrcNodes, [](const SDNode *N) {
40727+ return isTargetShuffleVariableMask(N->getOpcode());
40728+ });
4072340729 if (IsOptimizingSize && !HasVariableMask &&
4072440730 llvm::none_of(Ops, [](SDValue SrcOp) { return SrcOp->hasOneUse(); }))
4072540731 return SDValue();
@@ -40821,7 +40827,7 @@ namespace llvm {
4082140827static SDValue combineX86ShufflesRecursively(
4082240828 ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
4082340829 ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
40824- unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask,
40830+ unsigned MaxDepth, bool AllowVariableCrossLaneMask,
4082540831 bool AllowVariablePerLaneMask, SelectionDAG &DAG,
4082640832 const X86Subtarget &Subtarget) {
4082740833 assert(!RootMask.empty() &&
@@ -40877,7 +40883,6 @@ static SDValue combineX86ShufflesRecursively(
4087740883 SmallVector<int, 64> OpMask;
4087840884 SmallVector<SDValue, 2> OpInputs;
4087940885 APInt OpUndef, OpZero;
40880- bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode());
4088140886 if (getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
4088240887 OpZero, DAG, Depth, false)) {
4088340888 // Shuffle inputs must not be larger than the shuffle result.
@@ -41092,7 +41097,6 @@ static SDValue combineX86ShufflesRecursively(
4109241097 return getOnesVector(RootVT, DAG, DL);
4109341098
4109441099 assert(!Ops.empty() && "Shuffle with no inputs detected");
41095- HasVariableMask |= IsOpVariableMask;
4109641100
4109741101 // Update the list of shuffle nodes that have been combined so far.
4109841102 SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes);
@@ -41121,15 +41125,14 @@ static SDValue combineX86ShufflesRecursively(
4112141125 }
4112241126 if (SDValue Res = combineX86ShufflesRecursively(
4112341127 Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth,
41124- HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG,
41125- Subtarget))
41128+ AllowCrossLaneVar, AllowPerLaneVar, DAG, Subtarget))
4112641129 return Res;
4112741130 }
4112841131 }
4112941132
4113041133 // Attempt to constant fold all of the constant source ops.
4113141134 if (SDValue Cst = combineX86ShufflesConstants(
41132- RootVT, Ops, Mask, HasVariableMask , DAG, DL, Subtarget))
41135+ RootVT, Ops, Mask, CombinedNodes , DAG, DL, Subtarget))
4113341136 return Cst;
4113441137
4113541138 // If constant fold failed and we only have constants - then we have
@@ -41231,7 +41234,7 @@ static SDValue combineX86ShufflesRecursively(
4123141234
4123241235 // Try to combine into a single shuffle instruction.
4123341236 if (SDValue Shuffle = combineX86ShuffleChain(
41234- Ops, Root, Mask, Depth, HasVariableMask , AllowVariableCrossLaneMask,
41237+ Ops, Root, Mask, Depth, CombinedNodes , AllowVariableCrossLaneMask,
4123541238 AllowVariablePerLaneMask, DAG, Subtarget))
4123641239 return Shuffle;
4123741240
@@ -41250,7 +41253,7 @@ static SDValue combineX86ShufflesRecursively(
4125041253 // If that failed and any input is extracted then try to combine as a
4125141254 // shuffle with the larger type.
4125241255 return combineX86ShuffleChainWithExtract(
41253- Ops, Root, Mask, Depth, HasVariableMask , AllowVariableCrossLaneMask,
41256+ Ops, Root, Mask, Depth, CombinedNodes , AllowVariableCrossLaneMask,
4125441257 AllowVariablePerLaneMask, DAG, Subtarget);
4125541258}
4125641259
@@ -41259,7 +41262,6 @@ static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
4125941262 const X86Subtarget &Subtarget) {
4126041263 return combineX86ShufflesRecursively(
4126141264 {Op}, 0, Op, {0}, {}, /*Depth*/ 0, X86::MaxShuffleCombineDepth,
41262- /*HasVarMask*/ false,
4126341265 /*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, DAG,
4126441266 Subtarget);
4126541267}
@@ -41897,7 +41899,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4189741899 if (SDValue Res = combineX86ShufflesRecursively(
4189841900 {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
4189941901 X86::MaxShuffleCombineDepth,
41900- /*HasVarMask*/ false, /* AllowCrossLaneVarMask*/ true,
41902+ /*AllowCrossLaneVarMask*/ true,
4190141903 /*AllowPerLaneVarMask*/ true, DAG, Subtarget))
4190241904 return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
4190341905 DAG.getBitcast(SrcVT, Res));
@@ -42236,7 +42238,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4223642238 llvm::narrowShuffleMaskElts(EltBits / 8, Mask, ByteMask);
4223742239 if (SDValue NewMask = combineX86ShufflesConstants(
4223842240 ShufVT, {MaskLHS, MaskRHS}, ByteMask,
42239- /*HasVariableMask=*/true , DAG, DL, Subtarget)) {
42241+ {LHS.getNode(), RHS.getNode()} , DAG, DL, Subtarget)) {
4224042242 SDValue NewLHS = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT,
4224142243 LHS.getOperand(0), NewMask);
4224242244 SDValue NewRHS = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT,
@@ -43871,7 +43873,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4387143873
4387243874 SDValue NewShuffle = combineX86ShufflesRecursively(
4387343875 {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth,
43874- /*HasVarMask*/ false,
4387543876 /*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, TLO.DAG,
4387643877 Subtarget);
4387743878 if (NewShuffle)
@@ -51430,7 +51431,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5143051431 if (SDValue Shuffle = combineX86ShufflesRecursively(
5143151432 {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
5143251433 X86::MaxShuffleCombineDepth,
51433- /*HasVarMask*/ false, /* AllowVarCrossLaneMask*/ true,
51434+ /*AllowVarCrossLaneMask*/ true,
5143451435 /*AllowVarPerLaneMask*/ true, DAG, Subtarget))
5143551436 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle,
5143651437 N0.getOperand(1));
0 commit comments