2020//
2121// TODO List:
2222//
23- // Future loop memory idioms to recognize: memcmp, etc.
23+ // Future loop memory idioms to recognize:
24+ // memcmp, strlen, etc.
2425//
2526// This could recognize common matrix multiplies and dot product idioms and
2627// replace them with calls to BLAS (if linked in??).
3233#include " llvm/ADT/ArrayRef.h"
3334#include " llvm/ADT/DenseMap.h"
3435#include " llvm/ADT/MapVector.h"
35- #include " llvm/ADT/STLExtras.h"
3636#include " llvm/ADT/SetVector.h"
3737#include " llvm/ADT/SmallPtrSet.h"
3838#include " llvm/ADT/SmallVector.h"
@@ -97,7 +97,6 @@ using namespace llvm;
9797STATISTIC (NumMemSet, " Number of memset's formed from loop stores" );
9898STATISTIC (NumMemCpy, " Number of memcpy's formed from loop load+stores" );
9999STATISTIC (NumMemMove, " Number of memmove's formed from loop load+stores" );
100- STATISTIC (NumStrLen, " Number of strlen's and wcslen's formed from loop loads" );
101100STATISTIC (
102101 NumShiftUntilBitTest,
103102 " Number of uncountable loops recognized as 'shift until bitttest' idiom" );
@@ -127,22 +126,6 @@ static cl::opt<bool, true>
127126 cl::location(DisableLIRP::Memcpy), cl::init(false ),
128127 cl::ReallyHidden);
129128
130- bool DisableLIRP::Strlen;
131- static cl::opt<bool , true >
132- DisableLIRPStrlen (" disable-loop-idiom-strlen" ,
133- cl::desc (" Proceed with loop idiom recognize pass, but do "
134- " not convert loop(s) to strlen." ),
135- cl::location(DisableLIRP::Strlen), cl::init(false ),
136- cl::ReallyHidden);
137-
138- bool DisableLIRP::Wcslen;
139- static cl::opt<bool , true >
140- EnableLIRPWcslen (" disable-loop-idiom-wcslen" ,
141- cl::desc (" Proceed with loop idiom recognize pass, "
142- " enable conversion of loop(s) to wcslen." ),
143- cl::location(DisableLIRP::Wcslen), cl::init(false ),
144- cl::ReallyHidden);
145-
146129static cl::opt<bool > UseLIRCodeSizeHeurs (
147130 " use-lir-code-size-heurs" ,
148131 cl::desc (" Use loop idiom recognition code size heuristics when compiling "
@@ -263,7 +246,6 @@ class LoopIdiomRecognize {
263246
264247 bool recognizeShiftUntilBitTest ();
265248 bool recognizeShiftUntilZero ();
266- bool recognizeAndInsertStrLen ();
267249
268250 // / @}
269251};
@@ -1512,17 +1494,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
15121494
15131495 return recognizePopcount () || recognizeAndInsertFFS () ||
15141496 recognizeShiftUntilBitTest () || recognizeShiftUntilZero () ||
1515- recognizeShiftUntilLessThan () || recognizeAndInsertStrLen ();
1516- }
1517-
1518- // / Check if a Value is either a nullptr or a constant int zero
1519- static bool isZeroConstant (const Value *Val) {
1520- if (isa<ConstantPointerNull>(Val))
1521- return true ;
1522- const ConstantInt *CmpZero = dyn_cast<ConstantInt>(Val);
1523- if (!CmpZero || !CmpZero->isZero ())
1524- return false ;
1525- return true ;
1497+ recognizeShiftUntilLessThan ();
15261498}
15271499
15281500// / Check if the given conditional branch is based on the comparison between
@@ -1540,7 +1512,8 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15401512 if (!Cond)
15411513 return nullptr ;
15421514
1543- if (!isZeroConstant (Cond->getOperand (1 )))
1515+ ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1516+ if (!CmpZero || !CmpZero->isZero ())
15441517 return nullptr ;
15451518
15461519 BasicBlock *TrueSucc = BI->getSuccessor (0 );
@@ -1556,279 +1529,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15561529 return nullptr ;
15571530}
15581531
1559- namespace {
1560-
1561- class StrlenVerifier {
1562- public:
1563- explicit StrlenVerifier (const Loop *CurLoop, ScalarEvolution *SE,
1564- const TargetLibraryInfo *TLI)
1565- : CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1566-
1567- bool isValidStrlenIdiom () {
1568- // Give up if the loop has multiple blocks, multiple backedges, or
1569- // multiple exit blocks
1570- if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 ||
1571- !CurLoop->getUniqueExitBlock ())
1572- return false ;
1573-
1574- // It should have a preheader and a branch instruction.
1575- BasicBlock *Preheader = CurLoop->getLoopPreheader ();
1576- if (!Preheader)
1577- return false ;
1578-
1579- BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator ());
1580- if (!EntryBI)
1581- return false ;
1582-
1583- // The loop exit must be conditioned on an icmp with 0 the null terminator.
1584- // The icmp operand has to be a load on some SSA reg that increments
1585- // by 1 in the loop.
1586- BasicBlock *LoopBody = *CurLoop->block_begin ();
1587-
1588- // Skip if the body is too big as it most likely is not a strlen idiom.
1589- if (!LoopBody || LoopBody->size () >= 15 )
1590- return false ;
1591-
1592- BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator ());
1593- Value *LoopCond = matchCondition (LoopTerm, LoopBody);
1594- if (!LoopCond)
1595- return false ;
1596-
1597- LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1598- if (!LoopLoad || LoopLoad->getPointerAddressSpace () != 0 )
1599- return false ;
1600-
1601- OperandType = LoopLoad->getType ();
1602- if (!OperandType || !OperandType->isIntegerTy ())
1603- return false ;
1604-
1605- // See if the pointer expression is an AddRec with constant step a of form
1606- // ({n,+,a}) where a is the width of the char type.
1607- Value *IncPtr = LoopLoad->getPointerOperand ();
1608- const SCEVAddRecExpr *LoadEv =
1609- dyn_cast<SCEVAddRecExpr>(SE->getSCEV (IncPtr));
1610- if (!LoadEv || LoadEv->getLoop () != CurLoop || !LoadEv->isAffine ())
1611- return false ;
1612- LoadBaseEv = LoadEv->getStart ();
1613-
1614- LLVM_DEBUG ({
1615- dbgs () << " pointer load scev: " ;
1616- LoadEv->print (outs ());
1617- dbgs () << " \n " ;
1618- });
1619-
1620- const SCEVConstant *Step =
1621- dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence (*SE));
1622- if (!Step)
1623- return false ;
1624-
1625- unsigned StepSize = 0 ;
1626- StepSizeCI = dyn_cast<ConstantInt>(Step->getValue ());
1627- if (!StepSizeCI)
1628- return false ;
1629- StepSize = StepSizeCI->getZExtValue ();
1630-
1631- // Verify that StepSize is consistent with platform char width.
1632- OpWidth = OperandType->getIntegerBitWidth ();
1633- unsigned WcharSize = TLI->getWCharSize (*LoopLoad->getModule ());
1634- if (OpWidth != StepSize * 8 )
1635- return false ;
1636- if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32 )
1637- return false ;
1638- if (OpWidth >= 16 )
1639- if (OpWidth != WcharSize * 8 )
1640- return false ;
1641-
1642- // Scan every instruction in the loop to ensure there are no side effects.
1643- for (Instruction &I : *LoopBody)
1644- if (I.mayHaveSideEffects ())
1645- return false ;
1646-
1647- BasicBlock *LoopExitBB = CurLoop->getExitBlock ();
1648- if (!LoopExitBB)
1649- return false ;
1650-
1651- for (PHINode &PN : LoopExitBB->phis ()) {
1652- if (!SE->isSCEVable (PN.getType ()))
1653- return false ;
1654-
1655- const SCEV *Ev = SE->getSCEV (&PN);
1656- if (!Ev)
1657- return false ;
1658-
1659- LLVM_DEBUG ({
1660- dbgs () << " loop exit phi scev: " ;
1661- Ev->print (dbgs ());
1662- dbgs () << " \n " ;
1663- });
1664-
1665- // Since we verified that the loop trip count will be a valid strlen
1666- // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1667- // SCEVExpander materialize the loop output.
1668- const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1669- if (!AddRecEv || !AddRecEv->isAffine ())
1670- return false ;
1671-
1672- // We only want RecAddExpr with recurrence step that is constant. This
1673- // is good enough for all the idioms we want to recognize. Later we expand
1674- // and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1675- if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence (*SE)))
1676- return false ;
1677- }
1678-
1679- return true ;
1680- }
1681-
1682- public:
1683- const Loop *CurLoop;
1684- ScalarEvolution *SE;
1685- const TargetLibraryInfo *TLI;
1686-
1687- unsigned OpWidth;
1688- ConstantInt *StepSizeCI;
1689- const SCEV *LoadBaseEv;
1690- Type *OperandType;
1691- };
1692-
1693- } // namespace
1694-
1695- // / The Strlen Idiom we are trying to detect has the following structure
1696- // /
1697- // / preheader:
1698- // / ...
1699- // / br label %body, ...
1700- // /
1701- // / body:
1702- // / ... ; %0 is incremented by a gep
1703- // / %1 = load i8, ptr %0, align 1
1704- // / %2 = icmp eq i8 %1, 0
1705- // / br i1 %2, label %exit, label %body
1706- // /
1707- // / exit:
1708- // / %lcssa = phi [%0, %body], ...
1709- // /
1710- // / We expect the strlen idiom to have a load of a character type that
1711- // / is compared against '\0', and such load pointer operand must have scev
1712- // / expression of the form {%str,+,c} where c is a ConstantInt of the
1713- // / appropiate character width for the idiom, and %str is the base of the string
1714- // / And, that all lcssa phis have the form {...,+,n} where n is a constant,
1715- // /
1716- // / When transforming the output of the strlen idiom, the lccsa phi are
1717- // / expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1718- // / and all subsequent uses are replaced. For example,
1719- // /
1720- // / \code{.c}
1721- // / const char* base = str;
1722- // / while (*str != '\0')
1723- // / ++str;
1724- // / size_t result = str - base;
1725- // / \endcode
1726- // /
1727- // / will be transformed as follows: The idiom will be replaced by a strlen
1728- // / computation to compute the address of the null terminator of the string.
1729- // /
1730- // / \code{.c}
1731- // / const char* base = str;
1732- // / const char* end = base + strlen(str);
1733- // / size_t result = end - base;
1734- // / \endcode
1735- // /
1736- // / In the case we index by an induction variable, as long as the induction
1737- // / variable has a constant int increment, we can replace all such indvars
1738- // / with the closed form computation of strlen
1739- // /
1740- // / \code{.c}
1741- // / size_t i = 0;
1742- // / while (str[i] != '\0')
1743- // / ++i;
1744- // / size_t result = i;
1745- // / \endcode
1746- // /
1747- // / Will be replaced by
1748- // /
1749- // / \code{.c}
1750- // / size_t i = 0 + strlen(str);
1751- // / size_t result = i;
1752- // / \endcode
1753- // /
1754- bool LoopIdiomRecognize::recognizeAndInsertStrLen () {
1755- if (DisableLIRP::All)
1756- return false ;
1757-
1758- StrlenVerifier Verifier (CurLoop, SE, TLI);
1759-
1760- if (!Verifier.isValidStrlenIdiom ())
1761- return false ;
1762-
1763- BasicBlock *Preheader = CurLoop->getLoopPreheader ();
1764- BasicBlock *LoopExitBB = CurLoop->getExitBlock ();
1765-
1766- IRBuilder<> Builder (Preheader->getTerminator ());
1767- SCEVExpander Expander (*SE, Preheader->getModule ()->getDataLayout (),
1768- " strlen_idiom" );
1769- Value *MaterialzedBase = Expander.expandCodeFor (
1770- Verifier.LoadBaseEv , Verifier.LoadBaseEv ->getType (),
1771- Builder.GetInsertPoint ());
1772-
1773- Value *StrLenFunc = nullptr ;
1774- if (Verifier.OpWidth == 8 ) {
1775- if (DisableLIRP::Strlen)
1776- return false ;
1777- if (!isLibFuncEmittable (Preheader->getModule (), TLI, LibFunc_strlen))
1778- return false ;
1779- StrLenFunc = emitStrLen (MaterialzedBase, Builder, *DL, TLI);
1780- } else {
1781- if (DisableLIRP::Wcslen)
1782- return false ;
1783- if (!isLibFuncEmittable (Preheader->getModule (), TLI, LibFunc_wcslen))
1784- return false ;
1785- StrLenFunc = emitWcsLen (MaterialzedBase, Builder, *DL, TLI);
1786- }
1787- assert (StrLenFunc && " Failed to emit strlen function." );
1788-
1789- const SCEV *StrlenEv = SE->getSCEV (StrLenFunc);
1790- SmallVector<PHINode *, 4 > Cleanup;
1791- for (PHINode &PN : LoopExitBB->phis ()) {
1792- // We can now materialize the loop output as all phi have scev {base,+,a}.
1793- // We expand the phi as:
1794- // %strlen = call i64 @strlen(%str)
1795- // %phi.new = base expression + step * %strlen
1796- const SCEV *Ev = SE->getSCEV (&PN);
1797- const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1798- const SCEVConstant *Step =
1799- dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence (*SE));
1800- const SCEV *Base = AddRecEv->getStart ();
1801-
1802- // It is safe to truncate to base since if base is narrower than size_t
1803- // the equivalent user code will have to truncate anyways.
1804- const SCEV *NewEv = SE->getAddExpr (
1805- Base, SE->getMulExpr (Step, SE->getTruncateOrSignExtend (
1806- StrlenEv, Base->getType ())));
1807-
1808- Value *MaterializedPHI = Expander.expandCodeFor (NewEv, NewEv->getType (),
1809- Builder.GetInsertPoint ());
1810- Expander.clear ();
1811- PN.replaceAllUsesWith (MaterializedPHI);
1812- Cleanup.push_back (&PN);
1813- }
1814-
1815- // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
1816- // up by later passes
1817- for (PHINode *PN : Cleanup)
1818- RecursivelyDeleteDeadPHINode (PN);
1819- SE->forgetLoop (CurLoop);
1820-
1821- ++NumStrLen;
1822- LLVM_DEBUG (dbgs () << " Formed strlen idiom: " << *StrLenFunc << " \n " );
1823- ORE.emit ([&]() {
1824- return OptimizationRemark (DEBUG_TYPE, " recognizeAndInsertStrLen" ,
1825- CurLoop->getStartLoc (), Preheader)
1826- << " Transformed " << StrLenFunc->getName () << " loop idiom" ;
1827- });
1828-
1829- return true ;
1830- }
1831-
18321532// / Check if the given conditional branch is based on an unsigned less-than
18331533// / comparison between a variable and a constant, and if the comparison is false
18341534// / the control yields to the loop entry. If the branch matches the behaviour,
0 commit comments