@@ -113,6 +113,7 @@ class VectorCombine {
113113 bool scalarizeLoadExtract (Instruction &I);
114114 bool foldShuffleOfBinops (Instruction &I);
115115 bool foldShuffleOfCastops (Instruction &I);
116+ bool foldShuffleToIdentity (Instruction &I);
116117 bool foldShuffleFromReductions (Instruction &I);
117118 bool foldTruncFromReductions (Instruction &I);
118119 bool foldSelectShuffle (Instruction &I, bool FromReduction = false );
@@ -1552,6 +1553,148 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
15521553 return true ;
15531554}
15541555
1556+ // Starting from a shuffle, look up through operands tracking the shuffled index
1557+ // of each lane. If we can simplify away the shuffles to identities then
1558+ // do so.
1559+ bool VectorCombine::foldShuffleToIdentity (Instruction &I) {
1560+ FixedVectorType *Ty = dyn_cast<FixedVectorType>(I.getType ());
1561+ if (!Ty || !isa<Instruction>(I.getOperand (0 )) ||
1562+ !isa<Instruction>(I.getOperand (1 )))
1563+ return false ;
1564+
1565+ using InstLane = std::pair<Value *, int >;
1566+
1567+ auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane {
1568+ while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
1569+ unsigned NumElts =
1570+ cast<FixedVectorType>(SV->getOperand (0 )->getType ())->getNumElements ();
1571+ int M = SV->getMaskValue (Lane);
1572+ if (M < 0 )
1573+ return {nullptr , -1 };
1574+ else if (M < (int )NumElts) {
1575+ V = SV->getOperand (0 );
1576+ Lane = M;
1577+ } else {
1578+ V = SV->getOperand (1 );
1579+ Lane = M - NumElts;
1580+ }
1581+ }
1582+ return InstLane{V, Lane};
1583+ };
1584+
1585+ auto GenerateInstLaneVectorFromOperand =
1586+ [&LookThroughShuffles](const SmallVector<InstLane> &Item, int Op) {
1587+ SmallVector<InstLane> NItem;
1588+ for (InstLane V : Item) {
1589+ NItem.emplace_back (
1590+ !V.first
1591+ ? InstLane{nullptr , -1 }
1592+ : LookThroughShuffles (
1593+ cast<Instruction>(V.first )->getOperand (Op), V.second ));
1594+ }
1595+ return NItem;
1596+ };
1597+
1598+ SmallVector<InstLane> Start;
1599+ for (unsigned M = 0 ; M < Ty->getNumElements (); ++M)
1600+ Start.push_back (LookThroughShuffles (&I, M));
1601+
1602+ SmallVector<SmallVector<InstLane>> Worklist;
1603+ Worklist.push_back (Start);
1604+ SmallPtrSet<Value *, 4 > IdentityLeafs, SplatLeafs;
1605+ unsigned NumVisited = 0 ;
1606+
1607+ while (!Worklist.empty ()) {
1608+ SmallVector<InstLane> Item = Worklist.pop_back_val ();
1609+ if (++NumVisited > MaxInstrsToScan)
1610+ return false ;
1611+
1612+ // If we found an undef first lane then bail out to keep things simple.
1613+ if (!Item[0 ].first )
1614+ return false ;
1615+
1616+ // Look for an identity value.
1617+ if (Item[0 ].second == 0 && Item[0 ].first ->getType () == Ty &&
1618+ all_of (drop_begin (enumerate(Item)), [&](const auto &E) {
1619+ return !E.value ().first || (E.value ().first == Item[0 ].first &&
1620+ E.value ().second == (int )E.index ());
1621+ })) {
1622+ IdentityLeafs.insert (Item[0 ].first );
1623+ continue ;
1624+ }
1625+ // Look for a splat value.
1626+ if (all_of (drop_begin (Item), [&](InstLane &IL) {
1627+ return !IL.first ||
1628+ (IL.first == Item[0 ].first && IL.second == Item[0 ].second );
1629+ })) {
1630+ SplatLeafs.insert (Item[0 ].first );
1631+ continue ;
1632+ }
1633+
1634+ // We need each element to be the same type of value, and check that each
1635+ // element has a single use.
1636+ if (!all_of (drop_begin (Item), [&](InstLane IL) {
1637+ if (!IL.first )
1638+ return true ;
1639+ if (isa<Instruction>(IL.first ) &&
1640+ !cast<Instruction>(IL.first )->hasOneUse ())
1641+ return false ;
1642+ return IL.first ->getValueID () == Item[0 ].first ->getValueID () &&
1643+ (!isa<IntrinsicInst>(IL.first ) ||
1644+ cast<IntrinsicInst>(IL.first )->getIntrinsicID () ==
1645+ cast<IntrinsicInst>(Item[0 ].first )->getIntrinsicID ());
1646+ }))
1647+ return false ;
1648+
1649+ // Check the operator is one that we support.
1650+ if (isa<BinaryOperator>(Item[0 ].first )) {
1651+ Worklist.push_back (GenerateInstLaneVectorFromOperand (Item, 0 ));
1652+ Worklist.push_back (GenerateInstLaneVectorFromOperand (Item, 1 ));
1653+ } else if (isa<UnaryOperator>(Item[0 ].first )) {
1654+ Worklist.push_back (GenerateInstLaneVectorFromOperand (Item, 0 ));
1655+ } else {
1656+ return false ;
1657+ }
1658+ }
1659+
1660+ // If we got this far, we know the shuffles are superfluous and can be
1661+ // removed. Scan through again and generate the new tree of instructions.
1662+ std::function<Value *(const SmallVector<InstLane> &)> generate =
1663+ [&](const SmallVector<InstLane> &Item) -> Value * {
1664+ if (IdentityLeafs.contains (Item[0 ].first ) &&
1665+ all_of (drop_begin (enumerate(Item)), [&](const auto &E) {
1666+ return !E.value ().first || (E.value ().first == Item[0 ].first &&
1667+ E.value ().second == (int )E.index ());
1668+ })) {
1669+ return Item[0 ].first ;
1670+ } else if (SplatLeafs.contains (Item[0 ].first )) {
1671+ if (auto ILI = dyn_cast<Instruction>(Item[0 ].first ))
1672+ Builder.SetInsertPoint (*ILI->getInsertionPointAfterDef ());
1673+ else if (isa<Argument>(Item[0 ].first ))
1674+ Builder.SetInsertPointPastAllocas (I.getParent ()->getParent ());
1675+ SmallVector<int , 16 > Mask (Ty->getNumElements (), Item[0 ].second );
1676+ return Builder.CreateShuffleVector (Item[0 ].first , Mask);
1677+ }
1678+
1679+ auto *I = cast<Instruction>(Item[0 ].first );
1680+ SmallVector<Value *> Ops;
1681+ unsigned E = I->getNumOperands ();
1682+ for (unsigned Idx = 0 ; Idx < E; Idx++)
1683+ Ops.push_back (generate (GenerateInstLaneVectorFromOperand (Item, Idx)));
1684+ Builder.SetInsertPoint (I);
1685+ if (auto BI = dyn_cast<BinaryOperator>(I))
1686+ return Builder.CreateBinOp ((Instruction::BinaryOps)BI->getOpcode (),
1687+ Ops[0 ], Ops[1 ]);
1688+ if (auto UI = dyn_cast<UnaryOperator>(I))
1689+ return Builder.CreateUnOp ((Instruction::UnaryOps)UI->getOpcode (), Ops[0 ]);
1690+ llvm_unreachable (" Unhandled instruction in generate" );
1691+ };
1692+
1693+ Value *V = generate (Start);
1694+ replaceValue (I, *V);
1695+ return true ;
1696+ }
1697+
15551698// / Given a commutative reduction, the order of the input lanes does not alter
15561699// / the results. We can use this to remove certain shuffles feeding the
15571700// / reduction, removing the need to shuffle at all.
@@ -2108,6 +2251,7 @@ bool VectorCombine::run() {
21082251 MadeChange |= foldShuffleOfBinops (I);
21092252 MadeChange |= foldShuffleOfCastops (I);
21102253 MadeChange |= foldSelectShuffle (I);
2254+ MadeChange |= foldShuffleToIdentity (I);
21112255 break ;
21122256 case Instruction::BitCast:
21132257 MadeChange |= foldBitcastShuffle (I);
0 commit comments