2020//
2121// TODO List:
2222//
23- // Future loop memory idioms to recognize:
24- // memcmp, strlen, etc.
23+ // Future loop memory idioms to recognize: memcmp, etc.
2524//
2625// This could recognize common matrix multiplies and dot product idioms and
2726// replace them with calls to BLAS (if linked in??).
3332#include " llvm/ADT/ArrayRef.h"
3433#include " llvm/ADT/DenseMap.h"
3534#include " llvm/ADT/MapVector.h"
35+ #include " llvm/ADT/STLExtras.h"
3636#include " llvm/ADT/SetVector.h"
3737#include " llvm/ADT/SmallPtrSet.h"
3838#include " llvm/ADT/SmallVector.h"
@@ -97,6 +97,7 @@ using namespace llvm;
9797STATISTIC (NumMemSet, " Number of memset's formed from loop stores" );
9898STATISTIC (NumMemCpy, " Number of memcpy's formed from loop load+stores" );
9999STATISTIC (NumMemMove, " Number of memmove's formed from loop load+stores" );
100+ STATISTIC (NumStrLen, " Number of strlen's and wcslen's formed from loop loads" );
100101STATISTIC (
101102 NumShiftUntilBitTest,
102103 " Number of uncountable loops recognized as 'shift until bitttest' idiom" );
@@ -126,6 +127,22 @@ static cl::opt<bool, true>
126127 cl::location(DisableLIRP::Memcpy), cl::init(false ),
127128 cl::ReallyHidden);
128129
130+ bool DisableLIRP::Strlen;
131+ static cl::opt<bool , true >
132+ DisableLIRPStrlen (" disable-loop-idiom-strlen" ,
133+ cl::desc (" Proceed with loop idiom recognize pass, but do "
134+ " not convert loop(s) to strlen." ),
135+ cl::location(DisableLIRP::Strlen), cl::init(false ),
136+ cl::ReallyHidden);
137+
138+ bool DisableLIRP::Wcslen;
139+ static cl::opt<bool , true >
140+ EnableLIRPWcslen (" disable-loop-idiom-wcslen" ,
141+ cl::desc (" Proceed with loop idiom recognize pass, "
142+ " enable conversion of loop(s) to wcslen." ),
143+ cl::location(DisableLIRP::Wcslen), cl::init(false ),
144+ cl::ReallyHidden);
145+
129146static cl::opt<bool > UseLIRCodeSizeHeurs (
130147 " use-lir-code-size-heurs" ,
131148 cl::desc (" Use loop idiom recognition code size heuristics when compiling "
@@ -246,6 +263,7 @@ class LoopIdiomRecognize {
246263
247264 bool recognizeShiftUntilBitTest ();
248265 bool recognizeShiftUntilZero ();
266+ bool recognizeAndInsertStrLen ();
249267
250268 // / @}
251269};
@@ -295,7 +313,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
295313
296314 // Disable loop idiom recognition if the function's name is a common idiom.
297315 StringRef Name = L->getHeader ()->getParent ()->getName ();
298- if (Name == " memset" || Name == " memcpy" )
316+ if (Name == " memset" || Name == " memcpy" || Name == " strlen" ||
317+ Name == " wcslen" )
299318 return false ;
300319
301320 // Determine if code size heuristics need to be applied.
@@ -1494,7 +1513,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
14941513
14951514 return recognizePopcount () || recognizeAndInsertFFS () ||
14961515 recognizeShiftUntilBitTest () || recognizeShiftUntilZero () ||
1497- recognizeShiftUntilLessThan ();
1516+ recognizeShiftUntilLessThan () || recognizeAndInsertStrLen () ;
14981517}
14991518
15001519// / Check if the given conditional branch is based on the comparison between
@@ -1512,7 +1531,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15121531 if (!Cond)
15131532 return nullptr ;
15141533
1515- ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1534+ auto *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
15161535 if (!CmpZero || !CmpZero->isZero ())
15171536 return nullptr ;
15181537
@@ -1529,6 +1548,275 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15291548 return nullptr ;
15301549}
15311550
1551+ namespace {
1552+
1553+ class StrlenVerifier {
1554+ public:
1555+ explicit StrlenVerifier (const Loop *CurLoop, ScalarEvolution *SE,
1556+ const TargetLibraryInfo *TLI)
1557+ : CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1558+
1559+ bool isValidStrlenIdiom () {
1560+ // Give up if the loop has multiple blocks, multiple backedges, or
1561+ // multiple exit blocks
1562+ if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 ||
1563+ !CurLoop->getUniqueExitBlock ())
1564+ return false ;
1565+
1566+ // It should have a preheader and a branch instruction.
1567+ BasicBlock *Preheader = CurLoop->getLoopPreheader ();
1568+ if (!Preheader)
1569+ return false ;
1570+
1571+ BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator ());
1572+ if (!EntryBI)
1573+ return false ;
1574+
1575+ // The loop exit must be conditioned on an icmp with 0 the null terminator.
1576+ // The icmp operand has to be a load on some SSA reg that increments
1577+ // by 1 in the loop.
1578+ BasicBlock *LoopBody = *CurLoop->block_begin ();
1579+
1580+ // Skip if the body is too big as it most likely is not a strlen idiom.
1581+ if (!LoopBody || LoopBody->size () >= 15 )
1582+ return false ;
1583+
1584+ BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator ());
1585+ Value *LoopCond = matchCondition (LoopTerm, LoopBody);
1586+ if (!LoopCond)
1587+ return false ;
1588+
1589+ LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1590+ if (!LoopLoad || LoopLoad->getPointerAddressSpace () != 0 )
1591+ return false ;
1592+
1593+ OperandType = LoopLoad->getType ();
1594+ if (!OperandType || !OperandType->isIntegerTy ())
1595+ return false ;
1596+
1597+ // See if the pointer expression is an AddRec with constant step a of form
1598+ // ({n,+,a}) where a is the width of the char type.
1599+ Value *IncPtr = LoopLoad->getPointerOperand ();
1600+ const SCEVAddRecExpr *LoadEv =
1601+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV (IncPtr));
1602+ if (!LoadEv || LoadEv->getLoop () != CurLoop || !LoadEv->isAffine ())
1603+ return false ;
1604+ LoadBaseEv = LoadEv->getStart ();
1605+
1606+ LLVM_DEBUG (dbgs () << " pointer load scev: " << *LoadEv << " \n " );
1607+
1608+ const SCEVConstant *Step =
1609+ dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence (*SE));
1610+ if (!Step)
1611+ return false ;
1612+
1613+ unsigned StepSize = 0 ;
1614+ StepSizeCI = dyn_cast<ConstantInt>(Step->getValue ());
1615+ if (!StepSizeCI)
1616+ return false ;
1617+ StepSize = StepSizeCI->getZExtValue ();
1618+
1619+ // Verify that StepSize is consistent with platform char width.
1620+ OpWidth = OperandType->getIntegerBitWidth ();
1621+ unsigned WcharSize = TLI->getWCharSize (*LoopLoad->getModule ());
1622+ if (OpWidth != StepSize * 8 )
1623+ return false ;
1624+ if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32 )
1625+ return false ;
1626+ if (OpWidth >= 16 )
1627+ if (OpWidth != WcharSize * 8 )
1628+ return false ;
1629+
1630+ // Scan every instruction in the loop to ensure there are no side effects.
1631+ for (Instruction &I : *LoopBody)
1632+ if (I.mayHaveSideEffects ())
1633+ return false ;
1634+
1635+ BasicBlock *LoopExitBB = CurLoop->getExitBlock ();
1636+ if (!LoopExitBB)
1637+ return false ;
1638+
1639+ for (PHINode &PN : LoopExitBB->phis ()) {
1640+ if (!SE->isSCEVable (PN.getType ()))
1641+ return false ;
1642+
1643+ const SCEV *Ev = SE->getSCEV (&PN);
1644+ if (!Ev)
1645+ return false ;
1646+
1647+ LLVM_DEBUG (dbgs () << " loop exit phi scev: " << *Ev << " \n " );
1648+
1649+ // Since we verified that the loop trip count will be a valid strlen
1650+ // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1651+ // SCEVExpander materialize the loop output.
1652+ const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1653+ if (!AddRecEv || !AddRecEv->isAffine ())
1654+ return false ;
1655+
1656+ // We only want RecAddExpr with recurrence step that is constant. This
1657+ // is good enough for all the idioms we want to recognize. Later we expand
1658+ // and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1659+ if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence (*SE)))
1660+ return false ;
1661+ }
1662+
1663+ return true ;
1664+ }
1665+
1666+ public:
1667+ const Loop *CurLoop;
1668+ ScalarEvolution *SE;
1669+ const TargetLibraryInfo *TLI;
1670+
1671+ unsigned OpWidth;
1672+ ConstantInt *StepSizeCI;
1673+ const SCEV *LoadBaseEv;
1674+ Type *OperandType;
1675+ };
1676+
1677+ } // namespace
1678+
1679+ // / The Strlen Idiom we are trying to detect has the following structure
1680+ // /
1681+ // / preheader:
1682+ // / ...
1683+ // / br label %body, ...
1684+ // /
1685+ // / body:
1686+ // / ... ; %0 is incremented by a gep
1687+ // / %1 = load i8, ptr %0, align 1
1688+ // / %2 = icmp eq i8 %1, 0
1689+ // / br i1 %2, label %exit, label %body
1690+ // /
1691+ // / exit:
1692+ // / %lcssa = phi [%0, %body], ...
1693+ // /
1694+ // / We expect the strlen idiom to have a load of a character type that
1695+ // / is compared against '\0', and such load pointer operand must have scev
1696+ // / expression of the form {%str,+,c} where c is a ConstantInt of the
1697+ // / appropiate character width for the idiom, and %str is the base of the string
1698+ // / And, that all lcssa phis have the form {...,+,n} where n is a constant,
1699+ // /
1700+ // / When transforming the output of the strlen idiom, the lccsa phi are
1701+ // / expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1702+ // / and all subsequent uses are replaced. For example,
1703+ // /
1704+ // / \code{.c}
1705+ // / const char* base = str;
1706+ // / while (*str != '\0')
1707+ // / ++str;
1708+ // / size_t result = str - base;
1709+ // / \endcode
1710+ // /
1711+ // / will be transformed as follows: The idiom will be replaced by a strlen
1712+ // / computation to compute the address of the null terminator of the string.
1713+ // /
1714+ // / \code{.c}
1715+ // / const char* base = str;
1716+ // / const char* end = base + strlen(str);
1717+ // / size_t result = end - base;
1718+ // / \endcode
1719+ // /
1720+ // / In the case we index by an induction variable, as long as the induction
1721+ // / variable has a constant int increment, we can replace all such indvars
1722+ // / with the closed form computation of strlen
1723+ // /
1724+ // / \code{.c}
1725+ // / size_t i = 0;
1726+ // / while (str[i] != '\0')
1727+ // / ++i;
1728+ // / size_t result = i;
1729+ // / \endcode
1730+ // /
1731+ // / Will be replaced by
1732+ // /
1733+ // / \code{.c}
1734+ // / size_t i = 0 + strlen(str);
1735+ // / size_t result = i;
1736+ // / \endcode
1737+ // /
1738+ bool LoopIdiomRecognize::recognizeAndInsertStrLen () {
1739+ if (DisableLIRP::All)
1740+ return false ;
1741+
1742+ StrlenVerifier Verifier (CurLoop, SE, TLI);
1743+
1744+ if (!Verifier.isValidStrlenIdiom ())
1745+ return false ;
1746+
1747+ BasicBlock *Preheader = CurLoop->getLoopPreheader ();
1748+ BasicBlock *LoopExitBB = CurLoop->getExitBlock ();
1749+
1750+ if (Verifier.OpWidth == 8 ) {
1751+ if (DisableLIRP::Strlen)
1752+ return false ;
1753+ if (!isLibFuncEmittable (Preheader->getModule (), TLI, LibFunc_strlen))
1754+ return false ;
1755+ } else {
1756+ if (DisableLIRP::Wcslen)
1757+ return false ;
1758+ if (!isLibFuncEmittable (Preheader->getModule (), TLI, LibFunc_wcslen))
1759+ return false ;
1760+ }
1761+
1762+ IRBuilder<> Builder (Preheader->getTerminator ());
1763+ SCEVExpander Expander (*SE, Preheader->getModule ()->getDataLayout (),
1764+ " strlen_idiom" );
1765+ Value *MaterialzedBase = Expander.expandCodeFor (
1766+ Verifier.LoadBaseEv , Verifier.LoadBaseEv ->getType (),
1767+ Builder.GetInsertPoint ());
1768+
1769+ Value *StrLenFunc = nullptr ;
1770+ if (Verifier.OpWidth == 8 ) {
1771+ StrLenFunc = emitStrLen (MaterialzedBase, Builder, *DL, TLI);
1772+ } else {
1773+ StrLenFunc = emitWcsLen (MaterialzedBase, Builder, *DL, TLI);
1774+ }
1775+ assert (StrLenFunc && " Failed to emit strlen function." );
1776+
1777+ const SCEV *StrlenEv = SE->getSCEV (StrLenFunc);
1778+ SmallVector<PHINode *, 4 > Cleanup;
1779+ for (PHINode &PN : LoopExitBB->phis ()) {
1780+ // We can now materialize the loop output as all phi have scev {base,+,a}.
1781+ // We expand the phi as:
1782+ // %strlen = call i64 @strlen(%str)
1783+ // %phi.new = base expression + step * %strlen
1784+ const SCEV *Ev = SE->getSCEV (&PN);
1785+ const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1786+ const SCEVConstant *Step =
1787+ dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence (*SE));
1788+ const SCEV *Base = AddRecEv->getStart ();
1789+
1790+ // It is safe to truncate to base since if base is narrower than size_t
1791+ // the equivalent user code will have to truncate anyways.
1792+ const SCEV *NewEv = SE->getAddExpr (
1793+ Base, SE->getMulExpr (Step, SE->getTruncateOrSignExtend (
1794+ StrlenEv, Base->getType ())));
1795+
1796+ Value *MaterializedPHI = Expander.expandCodeFor (NewEv, NewEv->getType (),
1797+ Builder.GetInsertPoint ());
1798+ Expander.clear ();
1799+ PN.replaceAllUsesWith (MaterializedPHI);
1800+ Cleanup.push_back (&PN);
1801+ }
1802+
1803+ // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
1804+ // up by later passes
1805+ for (PHINode *PN : Cleanup)
1806+ RecursivelyDeleteDeadPHINode (PN);
1807+ SE->forgetLoop (CurLoop);
1808+
1809+ ++NumStrLen;
1810+ LLVM_DEBUG (dbgs () << " Formed strlen idiom: " << *StrLenFunc << " \n " );
1811+ ORE.emit ([&]() {
1812+ return OptimizationRemark (DEBUG_TYPE, " recognizeAndInsertStrLen" ,
1813+ CurLoop->getStartLoc (), Preheader)
1814+ << " Transformed " << StrLenFunc->getName () << " loop idiom" ;
1815+ });
1816+
1817+ return true ;
1818+ }
1819+
15321820// / Check if the given conditional branch is based on an unsigned less-than
15331821// / comparison between a variable and a constant, and if the comparison is false
15341822// / the control yields to the loop entry. If the branch matches the behaviour,
0 commit comments