@@ -1721,11 +1721,11 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
17211721 Value *V0, *V1;
17221722 UndefValue *U0, *U1;
17231723 ArrayRef<int > OuterMask, InnerMask0, InnerMask1;
1724- if (!match (&I, m_Shuffle ( m_OneUse ( m_Shuffle ( m_Value (V0), m_UndefValue (U0),
1725- m_Mask (InnerMask0))),
1726- m_OneUse ( m_Shuffle (m_Value (V1 ), m_UndefValue (U1 ),
1727- m_Mask (InnerMask1) )),
1728- m_Mask (OuterMask))))
1724+ if (!match (&I,
1725+ m_Shuffle (
1726+ m_Shuffle (m_Value (V0 ), m_UndefValue (U0), m_Mask (InnerMask0) ),
1727+ m_Shuffle ( m_Value (V1), m_UndefValue (U1), m_Mask (InnerMask1)),
1728+ m_Mask (OuterMask))))
17291729 return false ;
17301730
17311731 auto *ShufI0 = dyn_cast<Instruction>(I.getOperand (0 ));
@@ -1769,17 +1769,24 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
17691769 // Try to merge the shuffles if the new shuffle is not costly.
17701770 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
17711771
1772- InstructionCost OldCost =
1772+ InstructionCost InnerCost0 =
17731773 TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
1774- InnerMask0, CostKind, 0 , nullptr , {V0, U0}, ShufI0) +
1774+ InnerMask0, CostKind, 0 , nullptr , {V0, U0}, ShufI0);
1775+ InstructionCost InnerCost1 =
17751776 TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
1776- InnerMask1, CostKind, 0 , nullptr , {V1, U1}, ShufI1) +
1777+ InnerMask1, CostKind, 0 , nullptr , {V1, U1}, ShufI1);
1778+ InstructionCost OuterCost =
17771779 TTI.getShuffleCost (TargetTransformInfo::SK_PermuteTwoSrc, ShuffleImmTy,
17781780 OuterMask, CostKind, 0 , nullptr , {ShufI0, ShufI1}, &I);
1781+ InstructionCost OldCost = InnerCost0 + InnerCost1 + OuterCost;
17791782
17801783 InstructionCost NewCost =
17811784 TTI.getShuffleCost (TargetTransformInfo::SK_PermuteTwoSrc, ShuffleSrcTy,
17821785 NewMask, CostKind, 0 , nullptr , {V0, V1});
1786+ if (!ShufI0->hasOneUse ())
1787+ NewCost += InnerCost0;
1788+ if (!ShufI1->hasOneUse ())
1789+ NewCost += InnerCost1;
17831790
17841791 LLVM_DEBUG (dbgs () << " Found a shuffle feeding two shuffles: " << I
17851792 << " \n OldCost: " << OldCost << " vs NewCost: " << NewCost
0 commit comments