-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Reland "[Transforms] LoopIdiomRecognize recognize strlen and wcslen (#108985)" #131412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
afdca59
Initial upstreaming of strlen8 LIR 1 out of 3
mustartt 7ae690a
enable strlen insert
mustartt f752baa
replace LCSSA with null term ptr
mustartt f8057ff
update tests
mustartt 280bc11
Add wcslen idiom
mustartt 86497d2
refactor with SCEV Expander
mustartt 34fefa5
Add more tests
mustartt 35a2fc6
Refactor strlen detection
mustartt a4b7cd6
update tests
mustartt 05e2124
improve comments
mustartt e6faac5
revert formatting
mustartt 40ee5fa
remove DEBUG_TYPE from disable flags
mustartt 4e8851b
add more documentation
mustartt dee2518
address review comments
mustartt 528daea
address fmt
mustartt f0091d8
Fix isLibFuncEmittable logic error
mustartt 0e951d8
fix enable wcslen flag
mustartt 72440d0
invert wcslen condition
mustartt File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,8 +20,7 @@ | |
| // | ||
| // TODO List: | ||
| // | ||
| // Future loop memory idioms to recognize: | ||
| // memcmp, strlen, etc. | ||
| // Future loop memory idioms to recognize: memcmp, etc. | ||
| // | ||
| // This could recognize common matrix multiplies and dot product idioms and | ||
| // replace them with calls to BLAS (if linked in??). | ||
|
|
@@ -33,6 +32,7 @@ | |
| #include "llvm/ADT/ArrayRef.h" | ||
| #include "llvm/ADT/DenseMap.h" | ||
| #include "llvm/ADT/MapVector.h" | ||
| #include "llvm/ADT/STLExtras.h" | ||
| #include "llvm/ADT/SetVector.h" | ||
| #include "llvm/ADT/SmallPtrSet.h" | ||
| #include "llvm/ADT/SmallVector.h" | ||
|
|
@@ -97,6 +97,7 @@ using namespace llvm; | |
| STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); | ||
| STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); | ||
| STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores"); | ||
| STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads"); | ||
| STATISTIC( | ||
| NumShiftUntilBitTest, | ||
| "Number of uncountable loops recognized as 'shift until bitttest' idiom"); | ||
|
|
@@ -126,6 +127,22 @@ static cl::opt<bool, true> | |
| cl::location(DisableLIRP::Memcpy), cl::init(false), | ||
| cl::ReallyHidden); | ||
|
|
||
| bool DisableLIRP::Strlen; | ||
| static cl::opt<bool, true> | ||
| DisableLIRPStrlen("disable-loop-idiom-strlen", | ||
| cl::desc("Proceed with loop idiom recognize pass, but do " | ||
| "not convert loop(s) to strlen."), | ||
| cl::location(DisableLIRP::Strlen), cl::init(false), | ||
| cl::ReallyHidden); | ||
|
|
||
| bool DisableLIRP::Wcslen; | ||
| static cl::opt<bool, true> | ||
| EnableLIRPWcslen("disable-loop-idiom-wcslen", | ||
| cl::desc("Proceed with loop idiom recognize pass, " | ||
| "enable conversion of loop(s) to wcslen."), | ||
| cl::location(DisableLIRP::Wcslen), cl::init(false), | ||
| cl::ReallyHidden); | ||
|
|
||
| static cl::opt<bool> UseLIRCodeSizeHeurs( | ||
| "use-lir-code-size-heurs", | ||
| cl::desc("Use loop idiom recognition code size heuristics when compiling " | ||
|
|
@@ -246,6 +263,7 @@ class LoopIdiomRecognize { | |
|
|
||
| bool recognizeShiftUntilBitTest(); | ||
| bool recognizeShiftUntilZero(); | ||
| bool recognizeAndInsertStrLen(); | ||
|
|
||
| /// @} | ||
| }; | ||
|
|
@@ -1494,7 +1512,17 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { | |
|
|
||
| return recognizePopcount() || recognizeAndInsertFFS() || | ||
| recognizeShiftUntilBitTest() || recognizeShiftUntilZero() || | ||
| recognizeShiftUntilLessThan(); | ||
| recognizeShiftUntilLessThan() || recognizeAndInsertStrLen(); | ||
| } | ||
|
|
||
| /// Check if a Value is either a nullptr or a constant int zero | ||
| static bool isZeroConstant(const Value *Val) { | ||
| if (isa<ConstantPointerNull>(Val)) | ||
| return true; | ||
| const ConstantInt *CmpZero = dyn_cast<ConstantInt>(Val); | ||
| if (!CmpZero || !CmpZero->isZero()) | ||
| return false; | ||
| return true; | ||
| } | ||
|
|
||
| /// Check if the given conditional branch is based on the comparison between | ||
|
|
@@ -1512,8 +1540,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, | |
| if (!Cond) | ||
| return nullptr; | ||
|
|
||
| ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1)); | ||
| if (!CmpZero || !CmpZero->isZero()) | ||
| if (!isZeroConstant(Cond->getOperand(1))) | ||
| return nullptr; | ||
|
|
||
| BasicBlock *TrueSucc = BI->getSuccessor(0); | ||
|
|
@@ -1529,6 +1556,279 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, | |
| return nullptr; | ||
| } | ||
|
|
||
| namespace { | ||
|
|
||
| class StrlenVerifier { | ||
| public: | ||
| explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE, | ||
| const TargetLibraryInfo *TLI) | ||
| : CurLoop(CurLoop), SE(SE), TLI(TLI) {} | ||
|
|
||
| bool isValidStrlenIdiom() { | ||
| // Give up if the loop has multiple blocks, multiple backedges, or | ||
| // multiple exit blocks | ||
| if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 || | ||
| !CurLoop->getUniqueExitBlock()) | ||
| return false; | ||
|
|
||
| // It should have a preheader and a branch instruction. | ||
| BasicBlock *Preheader = CurLoop->getLoopPreheader(); | ||
| if (!Preheader) | ||
| return false; | ||
|
|
||
| BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator()); | ||
| if (!EntryBI) | ||
| return false; | ||
|
|
||
| // The loop exit must be conditioned on an icmp with 0 the null terminator. | ||
| // The icmp operand has to be a load on some SSA reg that increments | ||
| // by 1 in the loop. | ||
| BasicBlock *LoopBody = *CurLoop->block_begin(); | ||
|
|
||
| // Skip if the body is too big as it most likely is not a strlen idiom. | ||
| if (!LoopBody || LoopBody->size() >= 15) | ||
| return false; | ||
|
|
||
| BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator()); | ||
| Value *LoopCond = matchCondition(LoopTerm, LoopBody); | ||
| if (!LoopCond) | ||
| return false; | ||
|
|
||
| LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond); | ||
| if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0) | ||
| return false; | ||
|
|
||
| OperandType = LoopLoad->getType(); | ||
| if (!OperandType || !OperandType->isIntegerTy()) | ||
| return false; | ||
|
|
||
| // See if the pointer expression is an AddRec with constant step a of form | ||
| // ({n,+,a}) where a is the width of the char type. | ||
| Value *IncPtr = LoopLoad->getPointerOperand(); | ||
| const SCEVAddRecExpr *LoadEv = | ||
| dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr)); | ||
| if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) | ||
| return false; | ||
| LoadBaseEv = LoadEv->getStart(); | ||
|
|
||
| LLVM_DEBUG({ | ||
| dbgs() << "pointer load scev: "; | ||
| LoadEv->print(outs()); | ||
| dbgs() << "\n"; | ||
| }); | ||
|
|
||
| const SCEVConstant *Step = | ||
| dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE)); | ||
| if (!Step) | ||
| return false; | ||
|
|
||
| unsigned StepSize = 0; | ||
| StepSizeCI = dyn_cast<ConstantInt>(Step->getValue()); | ||
| if (!StepSizeCI) | ||
| return false; | ||
| StepSize = StepSizeCI->getZExtValue(); | ||
|
|
||
| // Verify that StepSize is consistent with platform char width. | ||
| OpWidth = OperandType->getIntegerBitWidth(); | ||
| unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule()); | ||
| if (OpWidth != StepSize * 8) | ||
| return false; | ||
| if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32) | ||
| return false; | ||
| if (OpWidth >= 16) | ||
| if (OpWidth != WcharSize * 8) | ||
| return false; | ||
|
|
||
| // Scan every instruction in the loop to ensure there are no side effects. | ||
| for (Instruction &I : *LoopBody) | ||
| if (I.mayHaveSideEffects()) | ||
| return false; | ||
|
|
||
| BasicBlock *LoopExitBB = CurLoop->getExitBlock(); | ||
| if (!LoopExitBB) | ||
| return false; | ||
|
|
||
| for (PHINode &PN : LoopExitBB->phis()) { | ||
| if (!SE->isSCEVable(PN.getType())) | ||
| return false; | ||
|
|
||
| const SCEV *Ev = SE->getSCEV(&PN); | ||
| if (!Ev) | ||
| return false; | ||
|
|
||
| LLVM_DEBUG({ | ||
| dbgs() << "loop exit phi scev: "; | ||
| Ev->print(dbgs()); | ||
| dbgs() << "\n"; | ||
|
Comment on lines
+1660
to
+1662
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can use |
||
| }); | ||
|
|
||
| // Since we verified that the loop trip count will be a valid strlen | ||
| // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use | ||
| // SCEVExpander materialize the loop output. | ||
| const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev); | ||
| if (!AddRecEv || !AddRecEv->isAffine()) | ||
| return false; | ||
|
|
||
| // We only want RecAddExpr with recurrence step that is constant. This | ||
| // is good enough for all the idioms we want to recognize. Later we expand | ||
| // and materialize the recurrence as {base,+,a} -> (base + a * strlen) | ||
| if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE))) | ||
| return false; | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| public: | ||
| const Loop *CurLoop; | ||
| ScalarEvolution *SE; | ||
| const TargetLibraryInfo *TLI; | ||
|
|
||
| unsigned OpWidth; | ||
| ConstantInt *StepSizeCI; | ||
| const SCEV *LoadBaseEv; | ||
| Type *OperandType; | ||
| }; | ||
|
|
||
| } // namespace | ||
|
|
||
| /// The Strlen Idiom we are trying to detect has the following structure | ||
| /// | ||
| /// preheader: | ||
| /// ... | ||
| /// br label %body, ... | ||
| /// | ||
| /// body: | ||
| /// ... ; %0 is incremented by a gep | ||
| /// %1 = load i8, ptr %0, align 1 | ||
| /// %2 = icmp eq i8 %1, 0 | ||
| /// br i1 %2, label %exit, label %body | ||
| /// | ||
| /// exit: | ||
| /// %lcssa = phi [%0, %body], ... | ||
| /// | ||
| /// We expect the strlen idiom to have a load of a character type that | ||
| /// is compared against '\0', and such load pointer operand must have scev | ||
| /// expression of the form {%str,+,c} where c is a ConstantInt of the | ||
| /// appropiate character width for the idiom, and %str is the base of the string | ||
| /// And, that all lcssa phis have the form {...,+,n} where n is a constant, | ||
| /// | ||
| /// When transforming the output of the strlen idiom, the lccsa phi are | ||
| /// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen) | ||
| /// and all subsequent uses are replaced. For example, | ||
| /// | ||
| /// \code{.c} | ||
| /// const char* base = str; | ||
| /// while (*str != '\0') | ||
| /// ++str; | ||
| /// size_t result = str - base; | ||
| /// \endcode | ||
| /// | ||
| /// will be transformed as follows: The idiom will be replaced by a strlen | ||
| /// computation to compute the address of the null terminator of the string. | ||
| /// | ||
| /// \code{.c} | ||
| /// const char* base = str; | ||
| /// const char* end = base + strlen(str); | ||
| /// size_t result = end - base; | ||
| /// \endcode | ||
| /// | ||
| /// In the case we index by an induction variable, as long as the induction | ||
| /// variable has a constant int increment, we can replace all such indvars | ||
| /// with the closed form computation of strlen | ||
| /// | ||
| /// \code{.c} | ||
| /// size_t i = 0; | ||
| /// while (str[i] != '\0') | ||
| /// ++i; | ||
| /// size_t result = i; | ||
| /// \endcode | ||
| /// | ||
| /// Will be replaced by | ||
| /// | ||
| /// \code{.c} | ||
| /// size_t i = 0 + strlen(str); | ||
| /// size_t result = i; | ||
| /// \endcode | ||
| /// | ||
| bool LoopIdiomRecognize::recognizeAndInsertStrLen() { | ||
| if (DisableLIRP::All) | ||
| return false; | ||
|
|
||
| StrlenVerifier Verifier(CurLoop, SE, TLI); | ||
|
|
||
| if (!Verifier.isValidStrlenIdiom()) | ||
| return false; | ||
|
|
||
| BasicBlock *Preheader = CurLoop->getLoopPreheader(); | ||
| BasicBlock *LoopExitBB = CurLoop->getExitBlock(); | ||
|
|
||
| IRBuilder<> Builder(Preheader->getTerminator()); | ||
| SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(), | ||
| "strlen_idiom"); | ||
| Value *MaterialzedBase = Expander.expandCodeFor( | ||
| Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(), | ||
| Builder.GetInsertPoint()); | ||
|
|
||
| Value *StrLenFunc = nullptr; | ||
| if (Verifier.OpWidth == 8) { | ||
| if (DisableLIRP::Strlen) | ||
| return false; | ||
| if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen)) | ||
| return false; | ||
| StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI); | ||
| } else { | ||
| if (DisableLIRP::Wcslen) | ||
| return false; | ||
| if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen)) | ||
| return false; | ||
| StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI); | ||
| } | ||
| assert(StrLenFunc && "Failed to emit strlen function."); | ||
|
|
||
| const SCEV *StrlenEv = SE->getSCEV(StrLenFunc); | ||
| SmallVector<PHINode *, 4> Cleanup; | ||
| for (PHINode &PN : LoopExitBB->phis()) { | ||
| // We can now materialize the loop output as all phi have scev {base,+,a}. | ||
| // We expand the phi as: | ||
| // %strlen = call i64 @strlen(%str) | ||
| // %phi.new = base expression + step * %strlen | ||
| const SCEV *Ev = SE->getSCEV(&PN); | ||
| const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev); | ||
| const SCEVConstant *Step = | ||
| dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)); | ||
| const SCEV *Base = AddRecEv->getStart(); | ||
|
|
||
| // It is safe to truncate to base since if base is narrower than size_t | ||
| // the equivalent user code will have to truncate anyways. | ||
| const SCEV *NewEv = SE->getAddExpr( | ||
| Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend( | ||
| StrlenEv, Base->getType()))); | ||
|
|
||
| Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(), | ||
| Builder.GetInsertPoint()); | ||
| Expander.clear(); | ||
| PN.replaceAllUsesWith(MaterializedPHI); | ||
| Cleanup.push_back(&PN); | ||
| } | ||
|
|
||
| // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned | ||
| // up by later passes | ||
| for (PHINode *PN : Cleanup) | ||
| RecursivelyDeleteDeadPHINode(PN); | ||
| SE->forgetLoop(CurLoop); | ||
|
|
||
| ++NumStrLen; | ||
| LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n"); | ||
| ORE.emit([&]() { | ||
| return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen", | ||
| CurLoop->getStartLoc(), Preheader) | ||
| << "Transformed " << StrLenFunc->getName() << " loop idiom"; | ||
| }); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| /// Check if the given conditional branch is based on an unsigned less-than | ||
| /// comparison between a variable and a constant, and if the comparison is false | ||
| /// the control yields to the loop entry. If the branch matches the behaviour, | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like
match(Val, m_Zero()). Though I'm not super clear on why this patch has to add the pointer handling at all.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for taking. You are right, it's no longer needed. We used to use this to condition the loop preheader on a
nullptrcomparison for the base of the string. I will clean this up