diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 78b9b736c478c..74ce6f4efb1cf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39642,9 +39642,10 @@ static bool matchBinaryPermuteShuffle( } static SDValue combineX86ShuffleChainWithExtract( - ArrayRef Inputs, SDValue Root, ArrayRef BaseMask, int Depth, - ArrayRef SrcNodes, bool AllowVariableCrossLaneMask, - bool AllowVariablePerLaneMask, bool IsMaskedShuffle, SelectionDAG &DAG, + ArrayRef Inputs, unsigned RootOpcode, MVT RootVT, + ArrayRef BaseMask, int Depth, ArrayRef SrcNodes, + bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, + bool IsMaskedShuffle, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget); /// Combine an arbitrary chain of shuffles into a single instruction if @@ -39657,16 +39658,14 @@ static SDValue combineX86ShuffleChainWithExtract( /// for this operation, or into a PSHUFB instruction which is a fully general /// instruction but should only be used to replace chains over a certain depth. static SDValue combineX86ShuffleChain( - ArrayRef Inputs, SDValue Root, ArrayRef BaseMask, int Depth, - ArrayRef SrcNodes, bool AllowVariableCrossLaneMask, - bool AllowVariablePerLaneMask, bool IsMaskedShuffle, SelectionDAG &DAG, - const SDLoc &DL, const X86Subtarget &Subtarget) { + ArrayRef Inputs, unsigned RootOpc, MVT RootVT, + ArrayRef BaseMask, int Depth, ArrayRef SrcNodes, + bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, + bool IsMaskedShuffle, SelectionDAG &DAG, const SDLoc &DL, + const X86Subtarget &Subtarget) { assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); assert((Inputs.size() == 1 || Inputs.size() == 2) && "Unexpected number of shuffle inputs!"); - - unsigned RootOpc = Root.getOpcode(); - MVT RootVT = Root.getSimpleValueType(); unsigned RootSizeInBits = RootVT.getSizeInBits(); unsigned NumRootElts = RootVT.getVectorNumElements(); @@ -40194,8 +40193,9 @@ static SDValue combineX86ShuffleChain( // If that failed and either input is extracted then try to combine as a // shuffle with the larger type. if (SDValue WideShuffle = combineX86ShuffleChainWithExtract( - Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask, - AllowVariablePerLaneMask, IsMaskedShuffle, DAG, Subtarget)) + Inputs, RootOpc, RootVT, BaseMask, Depth, SrcNodes, + AllowVariableCrossLaneMask, AllowVariablePerLaneMask, + IsMaskedShuffle, DAG, DL, Subtarget)) return WideShuffle; // If we have a dual input lane-crossing shuffle then lower to VPERMV3, @@ -40366,8 +40366,9 @@ static SDValue combineX86ShuffleChain( // If that failed and either input is extracted then try to combine as a // shuffle with the larger type. if (SDValue WideShuffle = combineX86ShuffleChainWithExtract( - Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask, - AllowVariablePerLaneMask, IsMaskedShuffle, DAG, Subtarget)) + Inputs, RootOpc, RootVT, BaseMask, Depth, SrcNodes, + AllowVariableCrossLaneMask, AllowVariablePerLaneMask, IsMaskedShuffle, + DAG, DL, Subtarget)) return WideShuffle; // If we have a dual input shuffle then lower to VPERMV3, @@ -40404,16 +40405,16 @@ static SDValue combineX86ShuffleChain( // --> // extract_subvector(shuffle(x,y,m2),0) static SDValue combineX86ShuffleChainWithExtract( - ArrayRef Inputs, SDValue Root, ArrayRef BaseMask, int Depth, - ArrayRef SrcNodes, bool AllowVariableCrossLaneMask, - bool AllowVariablePerLaneMask, bool IsMaskedShuffle, SelectionDAG &DAG, + ArrayRef Inputs, unsigned RootOpcode, MVT RootVT, + ArrayRef BaseMask, int Depth, ArrayRef SrcNodes, + bool AllowVariableCrossLaneMask, bool AllowVariablePerLaneMask, + bool IsMaskedShuffle, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget) { unsigned NumMaskElts = BaseMask.size(); unsigned NumInputs = Inputs.size(); if (NumInputs == 0) return SDValue(); - EVT RootVT = Root.getValueType(); unsigned RootSizeInBits = RootVT.getSizeInBits(); unsigned RootEltSizeInBits = RootSizeInBits / NumMaskElts; assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask"); @@ -40533,11 +40534,10 @@ static SDValue combineX86ShuffleChainWithExtract( "WideRootSize mismatch"); if (SDValue WideShuffle = combineX86ShuffleChain( - WideInputs, WideRoot, WideMask, Depth, SrcNodes, - AllowVariableCrossLaneMask, AllowVariablePerLaneMask, IsMaskedShuffle, - DAG, SDLoc(WideRoot), Subtarget)) { - WideShuffle = - extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits); + WideInputs, RootOpcode, WideRoot.getSimpleValueType(), WideMask, + Depth, SrcNodes, AllowVariableCrossLaneMask, AllowVariablePerLaneMask, + IsMaskedShuffle, DAG, SDLoc(WideRoot), Subtarget)) { + WideShuffle = extractSubVector(WideShuffle, 0, DAG, DL, RootSizeInBits); return DAG.getBitcast(RootVT, WideShuffle); } @@ -40881,15 +40881,14 @@ namespace llvm { /// combine-ordering. To fix this, we should do the redundant instruction /// combining in this recursive walk. static SDValue combineX86ShufflesRecursively( - ArrayRef SrcOps, int SrcOpIndex, SDValue Root, + ArrayRef SrcOps, int SrcOpIndex, unsigned RootOpc, MVT RootVT, ArrayRef RootMask, ArrayRef SrcNodes, unsigned Depth, unsigned MaxDepth, bool AllowVariableCrossLaneMask, - bool AllowVariablePerLaneMask, SelectionDAG &DAG, const SDLoc &DL, - const X86Subtarget &Subtarget) { + bool AllowVariablePerLaneMask, bool IsMaskedShuffle, SelectionDAG &DAG, + const SDLoc &DL, const X86Subtarget &Subtarget) { assert(!RootMask.empty() && (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) && "Illegal shuffle root mask"); - MVT RootVT = Root.getSimpleValueType(); assert(RootVT.isVector() && "Shuffles operate on vector types!"); unsigned RootSizeInBits = RootVT.getSizeInBits(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -41185,8 +41184,9 @@ static SDValue combineX86ShufflesRecursively( AllowPerLaneVar = AllowVariablePerLaneMask; } if (SDValue Res = combineX86ShufflesRecursively( - Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth, - AllowCrossLaneVar, AllowPerLaneVar, DAG, DL, Subtarget)) + Ops, i, RootOpc, RootVT, ResolvedMask, CombinedNodes, Depth + 1, + MaxDepth, AllowCrossLaneVar, AllowPerLaneVar, IsMaskedShuffle, + DAG, DL, Subtarget)) return Res; } } @@ -41271,10 +41271,6 @@ static SDValue combineX86ShufflesRecursively( resolveTargetShuffleInputsAndMask(Ops, Mask); } - // If we are a AVX512/EVEX target the mask element size should match the root - // element size to allow writemasks to be reused. - bool IsMaskedShuffle = isMaskableNode(Root, Subtarget); - // We can only combine unary and binary shuffle mask cases. if (Ops.size() <= 2) { // Minor canonicalization of the accumulated shuffle mask to make it easier @@ -41298,8 +41294,9 @@ static SDValue combineX86ShufflesRecursively( // Try to combine into a single shuffle instruction. if (SDValue Shuffle = combineX86ShuffleChain( - Ops, Root, Mask, Depth, CombinedNodes, AllowVariableCrossLaneMask, - AllowVariablePerLaneMask, IsMaskedShuffle, DAG, DL, Subtarget)) + Ops, RootOpc, RootVT, Mask, Depth, CombinedNodes, + AllowVariableCrossLaneMask, AllowVariablePerLaneMask, + IsMaskedShuffle, DAG, DL, Subtarget)) return Shuffle; // If all the operands come from the same larger vector, fallthrough and try @@ -41317,16 +41314,18 @@ static SDValue combineX86ShufflesRecursively( // If that failed and any input is extracted then try to combine as a // shuffle with the larger type. return combineX86ShuffleChainWithExtract( - Ops, Root, Mask, Depth, CombinedNodes, AllowVariableCrossLaneMask, - AllowVariablePerLaneMask, IsMaskedShuffle, DAG, Subtarget); + Ops, RootOpc, RootVT, Mask, Depth, CombinedNodes, + AllowVariableCrossLaneMask, AllowVariablePerLaneMask, IsMaskedShuffle, + DAG, DL, Subtarget); } /// Helper entry wrapper to combineX86ShufflesRecursively. static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { return combineX86ShufflesRecursively( - {Op}, 0, Op, {0}, {}, /*Depth*/ 0, X86::MaxShuffleCombineDepth, - /*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, DAG, + {Op}, 0, Op.getOpcode(), Op.getSimpleValueType(), {0}, {}, /*Depth=*/0, + X86::MaxShuffleCombineDepth, /*AllowCrossLaneVarMask=*/true, + /*AllowPerLaneVarMask=*/true, isMaskableNode(Op, Subtarget), DAG, SDLoc(Op), Subtarget); } @@ -41977,10 +41976,10 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, for (unsigned i = 0; i != Scale; ++i) DemandedMask[i] = i; if (SDValue Res = combineX86ShufflesRecursively( - {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0, - X86::MaxShuffleCombineDepth, - /*AllowCrossLaneVarMask*/ true, - /*AllowPerLaneVarMask*/ true, DAG, DL, Subtarget)) + {BC}, 0, BC.getOpcode(), BC.getSimpleValueType(), DemandedMask, + {}, /*Depth=*/0, X86::MaxShuffleCombineDepth, + /*AllowCrossLaneVarMask=*/true, /*AllowPerLaneVarMask=*/true, + /*IsMaskedShuffle=*/false, DAG, DL, Subtarget)) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); } @@ -43981,8 +43980,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( DemandedMask[i] = i; SDValue NewShuffle = combineX86ShufflesRecursively( - {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth, - /*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, TLO.DAG, + {Op}, 0, Op.getOpcode(), Op.getSimpleValueType(), DemandedMask, {}, 0, + X86::MaxShuffleCombineDepth - Depth, /*AllowCrossLaneVarMask=*/true, + /*AllowPerLaneVarMask=*/true, isMaskableNode(Op, Subtarget), TLO.DAG, SDLoc(Op), Subtarget); if (NewShuffle) return TLO.CombineTo(Op, NewShuffle); @@ -51617,10 +51617,10 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, } if (SDValue Shuffle = combineX86ShufflesRecursively( - {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1, - X86::MaxShuffleCombineDepth, - /*AllowVarCrossLaneMask*/ true, - /*AllowVarPerLaneMask*/ true, DAG, SDLoc(SrcVec), Subtarget)) + {SrcVec}, 0, SrcVec.getOpcode(), SrcVec.getSimpleValueType(), + ShuffleMask, {}, /*Depth=*/1, X86::MaxShuffleCombineDepth, + /*AllowVarCrossLaneMask=*/true, /*AllowVarPerLaneMask=*/true, + /*IsMaskedShuffle=*/false, DAG, SDLoc(SrcVec), Subtarget)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle, N0.getOperand(1)); }