@@ -231,19 +231,12 @@ class LoopIdiomRecognize {
231231 bool recognizePopcount ();
232232 void transformLoopToPopcount (BasicBlock *PreCondBB, Instruction *CntInst,
233233 PHINode *CntPhi, Value *Var);
234- bool isProfitableToInsertFFS (Intrinsic::ID IntrinID, Value *InitX,
235- bool ZeroCheck, size_t CanonicalSize);
236- bool insertFFSIfProfitable (Intrinsic::ID IntrinID, Value *InitX,
237- Instruction *DefX, PHINode *CntPhi,
238- Instruction *CntInst);
239234 bool recognizeAndInsertFFS (); // / Find First Set: ctlz or cttz
240- bool recognizeShiftUntilLessThan ();
241235 void transformLoopToCountable (Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
242236 Instruction *CntInst, PHINode *CntPhi,
243237 Value *Var, Instruction *DefX,
244238 const DebugLoc &DL, bool ZeroCheck,
245- bool IsCntPhiUsedOutsideLoop,
246- bool InsertSub = false );
239+ bool IsCntPhiUsedOutsideLoop);
247240
248241 bool recognizeShiftUntilBitTest ();
249242 bool recognizeShiftUntilZero ();
@@ -1489,8 +1482,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
14891482 << CurLoop->getHeader ()->getName () << " \n " );
14901483
14911484 return recognizePopcount () || recognizeAndInsertFFS () ||
1492- recognizeShiftUntilBitTest () || recognizeShiftUntilZero () ||
1493- recognizeShiftUntilLessThan ();
1485+ recognizeShiftUntilBitTest () || recognizeShiftUntilZero ();
14941486}
14951487
14961488// / Check if the given conditional branch is based on the comparison between
@@ -1525,34 +1517,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15251517 return nullptr ;
15261518}
15271519
1528- // / Check if the given conditional branch is based on an unsigned less-than
1529- // / comparison between a variable and a constant, and if the comparison is false
1530- // / the control yields to the loop entry. If the branch matches the behaviour,
1531- // / the variable involved in the comparison is returned.
1532- static Value *matchShiftULTCondition (BranchInst *BI, BasicBlock *LoopEntry,
1533- uint64_t &Threshold) {
1534- if (!BI || !BI->isConditional ())
1535- return nullptr ;
1536-
1537- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition ());
1538- if (!Cond)
1539- return nullptr ;
1540-
1541- ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1542- if (!CmpConst)
1543- return nullptr ;
1544-
1545- BasicBlock *FalseSucc = BI->getSuccessor (1 );
1546- ICmpInst::Predicate Pred = Cond->getPredicate ();
1547-
1548- if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
1549- Threshold = CmpConst->getZExtValue ();
1550- return Cond->getOperand (0 );
1551- }
1552-
1553- return nullptr ;
1554- }
1555-
15561520// Check if the recurrence variable `VarX` is in the right form to create
15571521// the idiom. Returns the value coerced to a PHINode if so.
15581522static PHINode *getRecurrenceVar (Value *VarX, Instruction *DefX,
@@ -1564,107 +1528,6 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
15641528 return nullptr ;
15651529}
15661530
1567- // / Return true if the idiom is detected in the loop.
1568- // /
1569- // / Additionally:
1570- // / 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1571- // / or nullptr if there is no such.
1572- // / 2) \p CntPhi is set to the corresponding phi node
1573- // / or nullptr if there is no such.
1574- // / 3) \p InitX is set to the value whose CTLZ could be used.
1575- // / 4) \p DefX is set to the instruction calculating Loop exit condition.
1576- // / 5) \p Threshold is set to the constant involved in the unsigned less-than
1577- // / comparison.
1578- // /
1579- // / The core idiom we are trying to detect is:
1580- // / \code
1581- // / if (x0 < 2)
1582- // / goto loop-exit // the precondition of the loop
1583- // / cnt0 = init-val
1584- // / do {
1585- // / x = phi (x0, x.next); //PhiX
1586- // / cnt = phi (cnt0, cnt.next)
1587- // /
1588- // / cnt.next = cnt + 1;
1589- // / ...
1590- // / x.next = x >> 1; // DefX
1591- // / } while (x >= 4)
1592- // / loop-exit:
1593- // / \endcode
1594- static bool detectShiftUntilLessThanIdiom (Loop *CurLoop, const DataLayout &DL,
1595- Intrinsic::ID &IntrinID,
1596- Value *&InitX, Instruction *&CntInst,
1597- PHINode *&CntPhi, Instruction *&DefX,
1598- uint64_t &Threshold) {
1599- BasicBlock *LoopEntry;
1600-
1601- DefX = nullptr ;
1602- CntInst = nullptr ;
1603- CntPhi = nullptr ;
1604- LoopEntry = *(CurLoop->block_begin ());
1605-
1606- // step 1: Check if the loop-back branch is in desirable form.
1607- if (Value *T = matchShiftULTCondition (
1608- dyn_cast<BranchInst>(LoopEntry->getTerminator ()), LoopEntry,
1609- Threshold))
1610- DefX = dyn_cast<Instruction>(T);
1611- else
1612- return false ;
1613-
1614- // step 2: Check the recurrence of variable X
1615- if (!DefX || !isa<PHINode>(DefX))
1616- return false ;
1617-
1618- PHINode *VarPhi = cast<PHINode>(DefX);
1619- int Idx = VarPhi->getBasicBlockIndex (LoopEntry);
1620- if (Idx == -1 )
1621- return false ;
1622-
1623- DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue (Idx));
1624- if (!DefX || DefX->getNumOperands () == 0 || DefX->getOperand (0 ) != VarPhi)
1625- return false ;
1626-
1627- // step 3: detect instructions corresponding to "x.next = x >> 1"
1628- if (DefX->getOpcode () != Instruction::LShr)
1629- return false ;
1630-
1631- IntrinID = Intrinsic::ctlz;
1632- ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand (1 ));
1633- if (!Shft || !Shft->isOne ())
1634- return false ;
1635-
1636- InitX = VarPhi->getIncomingValueForBlock (CurLoop->getLoopPreheader ());
1637-
1638- // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1639- // or cnt.next = cnt + -1.
1640- // TODO: We can skip the step. If loop trip count is known (CTLZ),
1641- // then all uses of "cnt.next" could be optimized to the trip count
1642- // plus "cnt0". Currently it is not optimized.
1643- // This step could be used to detect POPCNT instruction:
1644- // cnt.next = cnt + (x.next & 1)
1645- for (Instruction &Inst : llvm::make_range (
1646- LoopEntry->getFirstNonPHI ()->getIterator (), LoopEntry->end ())) {
1647- if (Inst.getOpcode () != Instruction::Add)
1648- continue ;
1649-
1650- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand (1 ));
1651- if (!Inc || (!Inc->isOne () && !Inc->isMinusOne ()))
1652- continue ;
1653-
1654- PHINode *Phi = getRecurrenceVar (Inst.getOperand (0 ), &Inst, LoopEntry);
1655- if (!Phi)
1656- continue ;
1657-
1658- CntInst = &Inst;
1659- CntPhi = Phi;
1660- break ;
1661- }
1662- if (!CntInst)
1663- return false ;
1664-
1665- return true ;
1666- }
1667-
16681531// / Return true iff the idiom is detected in the loop.
16691532// /
16701533// / Additionally:
@@ -1893,35 +1756,27 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
18931756 return true ;
18941757}
18951758
1896- // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1897- // profitable if we delete the loop.
1898- bool LoopIdiomRecognize::isProfitableToInsertFFS (Intrinsic::ID IntrinID,
1899- Value *InitX, bool ZeroCheck,
1900- size_t CanonicalSize) {
1901- const Value *Args[] = {InitX,
1902- ConstantInt::getBool (InitX-> getContext (), ZeroCheck)} ;
1759+ // / Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1760+ // / to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1761+ // / trip count returns true; otherwise, returns false.
1762+ bool LoopIdiomRecognize::recognizeAndInsertFFS () {
1763+ // Give up if the loop has multiple blocks or multiple backedges.
1764+ if (CurLoop-> getNumBackEdges () != 1 || CurLoop-> getNumBlocks () != 1 )
1765+ return false ;
19031766
1904- // @llvm.dbg doesn't count as they have no semantic effect.
1905- auto InstWithoutDebugIt = CurLoop->getHeader ()->instructionsWithoutDebug ();
1906- uint32_t HeaderSize =
1907- std::distance (InstWithoutDebugIt.begin (), InstWithoutDebugIt.end ());
1767+ Intrinsic::ID IntrinID;
1768+ Value *InitX;
1769+ Instruction *DefX = nullptr ;
1770+ PHINode *CntPhi = nullptr ;
1771+ Instruction *CntInst = nullptr ;
1772+ // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1773+ // this is always 6.
1774+ size_t IdiomCanonicalSize = 6 ;
19081775
1909- IntrinsicCostAttributes Attrs (IntrinID, InitX->getType (), Args);
1910- InstructionCost Cost = TTI->getIntrinsicInstrCost (
1911- Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1912- if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
1776+ if (!detectShiftUntilZeroIdiom (CurLoop, *DL, IntrinID, InitX,
1777+ CntInst, CntPhi, DefX))
19131778 return false ;
19141779
1915- return true ;
1916- }
1917-
1918- // / Convert CTLZ / CTTZ idiom loop into countable loop.
1919- // / If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
1920- // / returns false.
1921- bool LoopIdiomRecognize::insertFFSIfProfitable (Intrinsic::ID IntrinID,
1922- Value *InitX, Instruction *DefX,
1923- PHINode *CntPhi,
1924- Instruction *CntInst) {
19251780 bool IsCntPhiUsedOutsideLoop = false ;
19261781 for (User *U : CntPhi->users ())
19271782 if (!CurLoop->contains (cast<Instruction>(U))) {
@@ -1963,107 +1818,35 @@ bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
19631818 ZeroCheck = true ;
19641819 }
19651820
1966- // FFS idiom loop has only 6 instructions:
1821+ // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1822+ // profitable if we delete the loop.
1823+
1824+ // the loop has only 6 instructions:
19671825 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
19681826 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
19691827 // %shr = ashr %n.addr.0, 1
19701828 // %tobool = icmp eq %shr, 0
19711829 // %inc = add nsw %i.0, 1
19721830 // br i1 %tobool
1973- size_t IdiomCanonicalSize = 6 ;
1974- if (!isProfitableToInsertFFS (IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1975- return false ;
1976-
1977- transformLoopToCountable (IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1978- DefX->getDebugLoc (), ZeroCheck,
1979- IsCntPhiUsedOutsideLoop);
1980- return true ;
1981- }
1982-
1983- // / Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1984- // / to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1985- // / trip count returns true; otherwise, returns false.
1986- bool LoopIdiomRecognize::recognizeAndInsertFFS () {
1987- // Give up if the loop has multiple blocks or multiple backedges.
1988- if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 )
1989- return false ;
1990-
1991- Intrinsic::ID IntrinID;
1992- Value *InitX;
1993- Instruction *DefX = nullptr ;
1994- PHINode *CntPhi = nullptr ;
1995- Instruction *CntInst = nullptr ;
1996-
1997- if (!detectShiftUntilZeroIdiom (CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
1998- DefX))
1999- return false ;
20001831
2001- return insertFFSIfProfitable (IntrinID, InitX, DefX, CntPhi, CntInst);
2002- }
2003-
2004- bool LoopIdiomRecognize::recognizeShiftUntilLessThan () {
2005- // Give up if the loop has multiple blocks or multiple backedges.
2006- if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 )
2007- return false ;
2008-
2009- Intrinsic::ID IntrinID;
2010- Value *InitX;
2011- Instruction *DefX = nullptr ;
2012- PHINode *CntPhi = nullptr ;
2013- Instruction *CntInst = nullptr ;
2014-
2015- uint64_t LoopThreshold;
2016- if (!detectShiftUntilLessThanIdiom (CurLoop, *DL, IntrinID, InitX, CntInst,
2017- CntPhi, DefX, LoopThreshold))
2018- return false ;
2019-
2020- if (LoopThreshold == 2 ) {
2021- // Treat as regular FFS.
2022- return insertFFSIfProfitable (IntrinID, InitX, DefX, CntPhi, CntInst);
2023- }
2024-
2025- // Look for Floor Log2 Idiom.
2026- if (LoopThreshold != 4 )
2027- return false ;
2028-
2029- // Abort if CntPhi is used outside of the loop.
2030- for (User *U : CntPhi->users ())
2031- if (!CurLoop->contains (cast<Instruction>(U)))
2032- return false ;
2033-
2034- // It is safe to assume Preheader exist as it was checked in
2035- // parent function RunOnLoop.
2036- BasicBlock *PH = CurLoop->getLoopPreheader ();
2037- auto *PreCondBB = PH->getSinglePredecessor ();
2038- if (!PreCondBB)
2039- return false ;
2040- auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator ());
2041- if (!PreCondBI)
2042- return false ;
2043-
2044- uint64_t PreLoopThreshold;
2045- if (matchShiftULTCondition (PreCondBI, PH, PreLoopThreshold) != InitX ||
2046- PreLoopThreshold != 2 )
2047- return false ;
1832+ const Value *Args[] = {InitX,
1833+ ConstantInt::getBool (InitX->getContext (), ZeroCheck)};
20481834
2049- bool ZeroCheck = true ;
1835+ // @llvm.dbg doesn't count as they have no semantic effect.
1836+ auto InstWithoutDebugIt = CurLoop->getHeader ()->instructionsWithoutDebug ();
1837+ uint32_t HeaderSize =
1838+ std::distance (InstWithoutDebugIt.begin (), InstWithoutDebugIt.end ());
20501839
2051- // the loop has only 6 instructions:
2052- // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2053- // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2054- // %shr = ashr %n.addr.0, 1
2055- // %tobool = icmp ult %n.addr.0, C
2056- // %inc = add nsw %i.0, 1
2057- // br i1 %tobool
2058- size_t IdiomCanonicalSize = 6 ;
2059- if (!isProfitableToInsertFFS (IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1840+ IntrinsicCostAttributes Attrs (IntrinID, InitX->getType (), Args);
1841+ InstructionCost Cost =
1842+ TTI->getIntrinsicInstrCost (Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1843+ if (HeaderSize != IdiomCanonicalSize &&
1844+ Cost > TargetTransformInfo::TCC_Basic)
20601845 return false ;
20611846
2062- // log2(x) = w − 1 − clz(x)
20631847 transformLoopToCountable (IntrinID, PH, CntInst, CntPhi, InitX, DefX,
20641848 DefX->getDebugLoc (), ZeroCheck,
2065- /* IsCntPhiUsedOutsideLoop=*/ false ,
2066- /* InsertSub=*/ true );
1849+ IsCntPhiUsedOutsideLoop);
20671850 return true ;
20681851}
20691852
@@ -2178,7 +1961,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
21781961void LoopIdiomRecognize::transformLoopToCountable (
21791962 Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
21801963 PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
2181- bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub ) {
1964+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
21821965 BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator ());
21831966
21841967 // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -2208,8 +1991,6 @@ void LoopIdiomRecognize::transformLoopToCountable(
22081991 Type *CountTy = Count->getType ();
22091992 Count = Builder.CreateSub (
22101993 ConstantInt::get (CountTy, CountTy->getIntegerBitWidth ()), Count);
2211- if (InsertSub)
2212- Count = Builder.CreateSub (Count, ConstantInt::get (CountTy, 1 ));
22131994 Value *NewCount = Count;
22141995 if (IsCntPhiUsedOutsideLoop)
22151996 Count = Builder.CreateAdd (Count, ConstantInt::get (CountTy, 1 ));
0 commit comments