@@ -1732,6 +1732,36 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
17321732 TTI.getShuffleCost (TargetTransformInfo::SK_PermuteTwoSrc, BinResTy,
17331733 OldMask, CostKind, 0 , nullptr , {LHS, RHS}, &I);
17341734
1735+ // Handle shuffle(binop(shuffle(x),y),binop(z,shuffle(w))) style patterns
1736+ // where one use shuffles have gotten split across the binop/cmp. These
1737+ // often allow a major reduction in total cost that wouldn't happen as
1738+ // individual folds.
1739+ auto MergeInner = [&](Value *&Op, int Offset, MutableArrayRef<int > Mask,
1740+ TTI::TargetCostKind CostKind) -> bool {
1741+ Value *InnerOp;
1742+ ArrayRef<int > InnerMask;
1743+ if (match (Op, m_OneUse (m_Shuffle (m_Value (InnerOp), m_Undef (),
1744+ m_Mask (InnerMask)))) &&
1745+ all_of (InnerMask,
1746+ [NumSrcElts](int M) { return M < (int )NumSrcElts; }) &&
1747+ InnerOp->getType () == Op->getType ()) {
1748+ for (int &M : Mask)
1749+ if (Offset <= M && M < (int )(Offset + NumSrcElts)) {
1750+ M = InnerMask[M - Offset];
1751+ M = 0 <= M ? M + Offset : M;
1752+ }
1753+ OldCost += TTI.getInstructionCost (cast<Instruction>(Op), CostKind);
1754+ Op = InnerOp;
1755+ return true ;
1756+ }
1757+ return false ;
1758+ };
1759+ bool ReducedInstCount = false ;
1760+ ReducedInstCount |= MergeInner (X, 0 , NewMask0, CostKind);
1761+ ReducedInstCount |= MergeInner (Y, 0 , NewMask1, CostKind);
1762+ ReducedInstCount |= MergeInner (Z, NumSrcElts, NewMask0, CostKind);
1763+ ReducedInstCount |= MergeInner (W, NumSrcElts, NewMask1, CostKind);
1764+
17351765 InstructionCost NewCost =
17361766 TTI.getShuffleCost (SK0, BinOpTy, NewMask0, CostKind, 0 , nullptr , {X, Z}) +
17371767 TTI.getShuffleCost (SK1, BinOpTy, NewMask1, CostKind, 0 , nullptr , {Y, W});
@@ -1752,8 +1782,8 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
17521782
17531783 // If either shuffle will constant fold away, then fold for the same cost as
17541784 // we will reduce the instruction count.
1755- bool ReducedInstCount = (isa<Constant>(X) && isa<Constant>(Z)) ||
1756- (isa<Constant>(Y) && isa<Constant>(W));
1785+ ReducedInstCount | = (isa<Constant>(X) && isa<Constant>(Z)) ||
1786+ (isa<Constant>(Y) && isa<Constant>(W));
17571787 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
17581788 return false ;
17591789
0 commit comments