@@ -231,12 +231,19 @@ class LoopIdiomRecognize {
231231 bool recognizePopcount ();
232232 void transformLoopToPopcount (BasicBlock *PreCondBB, Instruction *CntInst,
233233 PHINode *CntPhi, Value *Var);
234+ bool isProfitableToInsertFFS (Intrinsic::ID IntrinID, Value *InitX,
235+ bool ZeroCheck, size_t CanonicalSize);
236+ bool insertFFSIfProfitable (Intrinsic::ID IntrinID, Value *InitX,
237+ Instruction *DefX, PHINode *CntPhi,
238+ Instruction *CntInst);
234239 bool recognizeAndInsertFFS (); // / Find First Set: ctlz or cttz
240+ bool recognizeShiftUntilLessThan ();
235241 void transformLoopToCountable (Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
236242 Instruction *CntInst, PHINode *CntPhi,
237243 Value *Var, Instruction *DefX,
238244 const DebugLoc &DL, bool ZeroCheck,
239- bool IsCntPhiUsedOutsideLoop);
245+ bool IsCntPhiUsedOutsideLoop,
246+ bool InsertSub = false );
240247
241248 bool recognizeShiftUntilBitTest ();
242249 bool recognizeShiftUntilZero ();
@@ -1482,7 +1489,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
14821489 << CurLoop->getHeader ()->getName () << " \n " );
14831490
14841491 return recognizePopcount () || recognizeAndInsertFFS () ||
1485- recognizeShiftUntilBitTest () || recognizeShiftUntilZero ();
1492+ recognizeShiftUntilBitTest () || recognizeShiftUntilZero () ||
1493+ recognizeShiftUntilLessThan ();
14861494}
14871495
14881496// / Check if the given conditional branch is based on the comparison between
@@ -1517,6 +1525,34 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15171525 return nullptr ;
15181526}
15191527
1528+ // / Check if the given conditional branch is based on an unsigned less-than
1529+ // / comparison between a variable and a constant, and if the comparison is false
1530+ // / the control yields to the loop entry. If the branch matches the behaviour,
1531+ // / the variable involved in the comparison is returned.
1532+ static Value *matchShiftULTCondition (BranchInst *BI, BasicBlock *LoopEntry,
1533+ uint64_t &Threshold) {
1534+ if (!BI || !BI->isConditional ())
1535+ return nullptr ;
1536+
1537+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition ());
1538+ if (!Cond)
1539+ return nullptr ;
1540+
1541+ ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1542+ if (!CmpConst)
1543+ return nullptr ;
1544+
1545+ BasicBlock *FalseSucc = BI->getSuccessor (1 );
1546+ ICmpInst::Predicate Pred = Cond->getPredicate ();
1547+
1548+ if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
1549+ Threshold = CmpConst->getZExtValue ();
1550+ return Cond->getOperand (0 );
1551+ }
1552+
1553+ return nullptr ;
1554+ }
1555+
15201556// Check if the recurrence variable `VarX` is in the right form to create
15211557// the idiom. Returns the value coerced to a PHINode if so.
15221558static PHINode *getRecurrenceVar (Value *VarX, Instruction *DefX,
@@ -1528,6 +1564,107 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
15281564 return nullptr ;
15291565}
15301566
1567+ // / Return true if the idiom is detected in the loop.
1568+ // /
1569+ // / Additionally:
1570+ // / 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1571+ // / or nullptr if there is no such.
1572+ // / 2) \p CntPhi is set to the corresponding phi node
1573+ // / or nullptr if there is no such.
1574+ // / 3) \p InitX is set to the value whose CTLZ could be used.
1575+ // / 4) \p DefX is set to the instruction calculating Loop exit condition.
1576+ // / 5) \p Threshold is set to the constant involved in the unsigned less-than
1577+ // / comparison.
1578+ // /
1579+ // / The core idiom we are trying to detect is:
1580+ // / \code
1581+ // / if (x0 < 2)
1582+ // / goto loop-exit // the precondition of the loop
1583+ // / cnt0 = init-val
1584+ // / do {
1585+ // / x = phi (x0, x.next); //PhiX
1586+ // / cnt = phi (cnt0, cnt.next)
1587+ // /
1588+ // / cnt.next = cnt + 1;
1589+ // / ...
1590+ // / x.next = x >> 1; // DefX
1591+ // / } while (x >= 4)
1592+ // / loop-exit:
1593+ // / \endcode
1594+ static bool detectShiftUntilLessThanIdiom (Loop *CurLoop, const DataLayout &DL,
1595+ Intrinsic::ID &IntrinID,
1596+ Value *&InitX, Instruction *&CntInst,
1597+ PHINode *&CntPhi, Instruction *&DefX,
1598+ uint64_t &Threshold) {
1599+ BasicBlock *LoopEntry;
1600+
1601+ DefX = nullptr ;
1602+ CntInst = nullptr ;
1603+ CntPhi = nullptr ;
1604+ LoopEntry = *(CurLoop->block_begin ());
1605+
1606+ // step 1: Check if the loop-back branch is in desirable form.
1607+ if (Value *T = matchShiftULTCondition (
1608+ dyn_cast<BranchInst>(LoopEntry->getTerminator ()), LoopEntry,
1609+ Threshold))
1610+ DefX = dyn_cast<Instruction>(T);
1611+ else
1612+ return false ;
1613+
1614+ // step 2: Check the recurrence of variable X
1615+ if (!DefX || !isa<PHINode>(DefX))
1616+ return false ;
1617+
1618+ PHINode *VarPhi = cast<PHINode>(DefX);
1619+ int Idx = VarPhi->getBasicBlockIndex (LoopEntry);
1620+ if (Idx == -1 )
1621+ return false ;
1622+
1623+ DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue (Idx));
1624+ if (!DefX || DefX->getNumOperands () == 0 || DefX->getOperand (0 ) != VarPhi)
1625+ return false ;
1626+
1627+ // step 3: detect instructions corresponding to "x.next = x >> 1"
1628+ if (DefX->getOpcode () != Instruction::LShr)
1629+ return false ;
1630+
1631+ IntrinID = Intrinsic::ctlz;
1632+ ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand (1 ));
1633+ if (!Shft || !Shft->isOne ())
1634+ return false ;
1635+
1636+ InitX = VarPhi->getIncomingValueForBlock (CurLoop->getLoopPreheader ());
1637+
1638+ // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1639+ // or cnt.next = cnt + -1.
1640+ // TODO: We can skip the step. If loop trip count is known (CTLZ),
1641+ // then all uses of "cnt.next" could be optimized to the trip count
1642+ // plus "cnt0". Currently it is not optimized.
1643+ // This step could be used to detect POPCNT instruction:
1644+ // cnt.next = cnt + (x.next & 1)
1645+ for (Instruction &Inst : llvm::make_range (
1646+ LoopEntry->getFirstNonPHI ()->getIterator (), LoopEntry->end ())) {
1647+ if (Inst.getOpcode () != Instruction::Add)
1648+ continue ;
1649+
1650+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand (1 ));
1651+ if (!Inc || (!Inc->isOne () && !Inc->isMinusOne ()))
1652+ continue ;
1653+
1654+ PHINode *Phi = getRecurrenceVar (Inst.getOperand (0 ), &Inst, LoopEntry);
1655+ if (!Phi)
1656+ continue ;
1657+
1658+ CntInst = &Inst;
1659+ CntPhi = Phi;
1660+ break ;
1661+ }
1662+ if (!CntInst)
1663+ return false ;
1664+
1665+ return true ;
1666+ }
1667+
15311668// / Return true iff the idiom is detected in the loop.
15321669// /
15331670// / Additionally:
@@ -1756,27 +1893,35 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
17561893 return true ;
17571894}
17581895
1759- // / Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1760- // / to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1761- // / trip count returns true; otherwise, returns false.
1762- bool LoopIdiomRecognize::recognizeAndInsertFFS () {
1763- // Give up if the loop has multiple blocks or multiple backedges.
1764- if (CurLoop-> getNumBackEdges () != 1 || CurLoop-> getNumBlocks () != 1 )
1765- return false ;
1896+ // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1897+ // profitable if we delete the loop.
1898+ bool LoopIdiomRecognize::isProfitableToInsertFFS (Intrinsic::ID IntrinID,
1899+ Value *InitX, bool ZeroCheck,
1900+ size_t CanonicalSize) {
1901+ const Value *Args[] = {InitX,
1902+ ConstantInt::getBool (InitX-> getContext (), ZeroCheck)} ;
17661903
1767- Intrinsic::ID IntrinID;
1768- Value *InitX;
1769- Instruction *DefX = nullptr ;
1770- PHINode *CntPhi = nullptr ;
1771- Instruction *CntInst = nullptr ;
1772- // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1773- // this is always 6.
1774- size_t IdiomCanonicalSize = 6 ;
1904+ // @llvm.dbg doesn't count as they have no semantic effect.
1905+ auto InstWithoutDebugIt = CurLoop->getHeader ()->instructionsWithoutDebug ();
1906+ uint32_t HeaderSize =
1907+ std::distance (InstWithoutDebugIt.begin (), InstWithoutDebugIt.end ());
17751908
1776- if (!detectShiftUntilZeroIdiom (CurLoop, *DL, IntrinID, InitX,
1777- CntInst, CntPhi, DefX))
1909+ IntrinsicCostAttributes Attrs (IntrinID, InitX->getType (), Args);
1910+ InstructionCost Cost = TTI->getIntrinsicInstrCost (
1911+ Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1912+ if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
17781913 return false ;
17791914
1915+ return true ;
1916+ }
1917+
1918+ // / Convert CTLZ / CTTZ idiom loop into countable loop.
1919+ // / If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
1920+ // / returns false.
1921+ bool LoopIdiomRecognize::insertFFSIfProfitable (Intrinsic::ID IntrinID,
1922+ Value *InitX, Instruction *DefX,
1923+ PHINode *CntPhi,
1924+ Instruction *CntInst) {
17801925 bool IsCntPhiUsedOutsideLoop = false ;
17811926 for (User *U : CntPhi->users ())
17821927 if (!CurLoop->contains (cast<Instruction>(U))) {
@@ -1818,35 +1963,107 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
18181963 ZeroCheck = true ;
18191964 }
18201965
1821- // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1822- // profitable if we delete the loop.
1823-
1824- // the loop has only 6 instructions:
1966+ // FFS idiom loop has only 6 instructions:
18251967 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
18261968 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
18271969 // %shr = ashr %n.addr.0, 1
18281970 // %tobool = icmp eq %shr, 0
18291971 // %inc = add nsw %i.0, 1
18301972 // br i1 %tobool
1973+ size_t IdiomCanonicalSize = 6 ;
1974+ if (!isProfitableToInsertFFS (IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1975+ return false ;
18311976
1832- const Value *Args[] = {InitX,
1833- ConstantInt::getBool (InitX->getContext (), ZeroCheck)};
1977+ transformLoopToCountable (IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1978+ DefX->getDebugLoc (), ZeroCheck,
1979+ IsCntPhiUsedOutsideLoop);
1980+ return true ;
1981+ }
18341982
1835- // @llvm.dbg doesn't count as they have no semantic effect.
1836- auto InstWithoutDebugIt = CurLoop->getHeader ()->instructionsWithoutDebug ();
1837- uint32_t HeaderSize =
1838- std::distance (InstWithoutDebugIt.begin (), InstWithoutDebugIt.end ());
1983+ // / Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1984+ // / to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1985+ // / trip count returns true; otherwise, returns false.
1986+ bool LoopIdiomRecognize::recognizeAndInsertFFS () {
1987+ // Give up if the loop has multiple blocks or multiple backedges.
1988+ if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 )
1989+ return false ;
18391990
1840- IntrinsicCostAttributes Attrs (IntrinID, InitX->getType (), Args);
1841- InstructionCost Cost =
1842- TTI->getIntrinsicInstrCost (Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1843- if (HeaderSize != IdiomCanonicalSize &&
1844- Cost > TargetTransformInfo::TCC_Basic)
1991+ Intrinsic::ID IntrinID;
1992+ Value *InitX;
1993+ Instruction *DefX = nullptr ;
1994+ PHINode *CntPhi = nullptr ;
1995+ Instruction *CntInst = nullptr ;
1996+
1997+ if (!detectShiftUntilZeroIdiom (CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
1998+ DefX))
1999+ return false ;
2000+
2001+ return insertFFSIfProfitable (IntrinID, InitX, DefX, CntPhi, CntInst);
2002+ }
2003+
2004+ bool LoopIdiomRecognize::recognizeShiftUntilLessThan () {
2005+ // Give up if the loop has multiple blocks or multiple backedges.
2006+ if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 )
2007+ return false ;
2008+
2009+ Intrinsic::ID IntrinID;
2010+ Value *InitX;
2011+ Instruction *DefX = nullptr ;
2012+ PHINode *CntPhi = nullptr ;
2013+ Instruction *CntInst = nullptr ;
2014+
2015+ uint64_t LoopThreshold;
2016+ if (!detectShiftUntilLessThanIdiom (CurLoop, *DL, IntrinID, InitX, CntInst,
2017+ CntPhi, DefX, LoopThreshold))
2018+ return false ;
2019+
2020+ if (LoopThreshold == 2 ) {
2021+ // Treat as regular FFS.
2022+ return insertFFSIfProfitable (IntrinID, InitX, DefX, CntPhi, CntInst);
2023+ }
2024+
2025+ // Look for Floor Log2 Idiom.
2026+ if (LoopThreshold != 4 )
2027+ return false ;
2028+
2029+ // Abort if CntPhi is used outside of the loop.
2030+ for (User *U : CntPhi->users ())
2031+ if (!CurLoop->contains (cast<Instruction>(U)))
2032+ return false ;
2033+
2034+ // It is safe to assume Preheader exist as it was checked in
2035+ // parent function RunOnLoop.
2036+ BasicBlock *PH = CurLoop->getLoopPreheader ();
2037+ auto *PreCondBB = PH->getSinglePredecessor ();
2038+ if (!PreCondBB)
2039+ return false ;
2040+ auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator ());
2041+ if (!PreCondBI)
2042+ return false ;
2043+
2044+ uint64_t PreLoopThreshold;
2045+ if (matchShiftULTCondition (PreCondBI, PH, PreLoopThreshold) != InitX ||
2046+ PreLoopThreshold != 2 )
18452047 return false ;
18462048
2049+ bool ZeroCheck = true ;
2050+
2051+ // the loop has only 6 instructions:
2052+ // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2053+ // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2054+ // %shr = ashr %n.addr.0, 1
2055+ // %tobool = icmp ult %n.addr.0, C
2056+ // %inc = add nsw %i.0, 1
2057+ // br i1 %tobool
2058+ size_t IdiomCanonicalSize = 6 ;
2059+ if (!isProfitableToInsertFFS (IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2060+ return false ;
2061+
2062+ // log2(x) = w − 1 − clz(x)
18472063 transformLoopToCountable (IntrinID, PH, CntInst, CntPhi, InitX, DefX,
18482064 DefX->getDebugLoc (), ZeroCheck,
1849- IsCntPhiUsedOutsideLoop);
2065+ /* IsCntPhiUsedOutsideLoop=*/ false ,
2066+ /* InsertSub=*/ true );
18502067 return true ;
18512068}
18522069
@@ -1961,7 +2178,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
19612178void LoopIdiomRecognize::transformLoopToCountable (
19622179 Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
19632180 PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
1964- bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
2181+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub ) {
19652182 BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator ());
19662183
19672184 // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -1991,6 +2208,8 @@ void LoopIdiomRecognize::transformLoopToCountable(
19912208 Type *CountTy = Count->getType ();
19922209 Count = Builder.CreateSub (
19932210 ConstantInt::get (CountTy, CountTy->getIntegerBitWidth ()), Count);
2211+ if (InsertSub)
2212+ Count = Builder.CreateSub (Count, ConstantInt::get (CountTy, 1 ));
19942213 Value *NewCount = Count;
19952214 if (IsCntPhiUsedOutsideLoop)
19962215 Count = Builder.CreateAdd (Count, ConstantInt::get (CountTy, 1 ));
0 commit comments