-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[IndVarSimplify] Allow predicateLoopExit on some loops with thread-local writes #155901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
fmayer
merged 18 commits into
main
from
users/fmayer/spr/indvarsimplify-allow-predicateloopexit-on-some-loops-with-local-writes
Oct 16, 2025
Merged
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
70490db
[𝘀𝗽𝗿] initial version
fmayer 78851f9
comment
fmayer 5d1e1ab
add flag
fmayer f896688
atomic and volatile
fmayer a4f857b
TODO
fmayer c278bd4
style
fmayer 6d51827
change
fmayer 3992eac
more test
fmayer 4817d7c
address comments
fmayer a0bfcb0
do not hardcode traps
fmayer f1d9f8e
fixup
fmayer 8e07cd7
update
fmayer 7f6c044
fmt
fmayer 0aaf4d3
review comments
fmayer 8133497
rebase
fmayer aecd38d
comments
fmayer 97ab69d
test update
fmayer e17b5a7
comment
fmayer File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,7 @@ | |
#include "llvm/IR/InstrTypes.h" | ||
#include "llvm/IR/Instruction.h" | ||
#include "llvm/IR/Instructions.h" | ||
#include "llvm/IR/IntrinsicInst.h" | ||
#include "llvm/IR/Intrinsics.h" | ||
#include "llvm/IR/PassManager.h" | ||
#include "llvm/IR/PatternMatch.h" | ||
|
@@ -117,6 +118,10 @@ static cl::opt<bool> | |
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), | ||
cl::desc("Predicate conditions in read only loops")); | ||
|
||
static cl::opt<bool> LoopPredicationTraps( | ||
"indvars-predicate-loop-traps", cl::Hidden, cl::init(true), | ||
cl::desc("Predicate conditions that trap in loops with only local writes")); | ||
|
||
static cl::opt<bool> | ||
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), | ||
cl::desc("Allow widening of indvars to eliminate s/zext")); | ||
|
@@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { | |
return Changed; | ||
} | ||
|
||
static bool crashingBBWithoutEffect(const BasicBlock &BB) { | ||
return llvm::all_of(BB, [](const Instruction &I) { | ||
// TODO: for now this is overly restrictive, to make sure nothing in this | ||
// BB can depend on the loop body. | ||
// It's not enough to check for !I.mayHaveSideEffects(), because e.g. a | ||
// load does not have a side effect, but we could have | ||
// %a = load ptr, ptr %ptr | ||
// %b = load i32, ptr %a | ||
// Now if the loop stored a non-nullptr to %a, we could cause a nullptr | ||
// dereference by skipping over loop iterations. | ||
if (const auto *CB = dyn_cast<CallBase>(&I)) { | ||
if (CB->onlyAccessesInaccessibleMemory()) | ||
return true; | ||
} | ||
return isa<UnreachableInst>(I); | ||
}); | ||
} | ||
|
||
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { | ||
SmallVector<BasicBlock*, 16> ExitingBlocks; | ||
L->getExitingBlocks(ExitingBlocks); | ||
|
@@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { | |
// suggestions on how to improve this? I can obviously bail out for outer | ||
// loops, but that seems less than ideal. MemorySSA can find memory writes, | ||
// is that enough for *all* side effects? | ||
bool HasThreadLocalSideEffects = false; | ||
for (BasicBlock *BB : L->blocks()) | ||
for (auto &I : *BB) | ||
// TODO:isGuaranteedToTransfer | ||
if (I.mayHaveSideEffects()) | ||
return false; | ||
if (I.mayHaveSideEffects()) { | ||
if (!LoopPredicationTraps) | ||
return false; | ||
HasThreadLocalSideEffects = true; | ||
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { | ||
// Simple stores cannot be observed by other threads. | ||
// If HasThreadLocalSideEffects is set, we check | ||
// crashingBBWithoutEffect to make sure that the crashing BB cannot | ||
// observe them either. | ||
if (!SI->isSimple()) | ||
return false; | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
bool Changed = false; | ||
// Finally, do the actual predication for all predicatable blocks. A couple | ||
|
@@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { | |
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); | ||
|
||
auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); | ||
if (HasThreadLocalSideEffects) { | ||
const BasicBlock *Unreachable = nullptr; | ||
for (const BasicBlock *Succ : BI->successors()) { | ||
if (isa<UnreachableInst>(Succ->getTerminator())) | ||
Unreachable = Succ; | ||
} | ||
// Exit BB which have one branch back into the loop and another one to | ||
// a trap can still be optimized, because local side effects cannot | ||
// be observed in the exit case (the trap). We could be smarter about | ||
// this, but for now lets pattern match common cases that directly trap. | ||
if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable)) | ||
return Changed; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add tests to check this does the right thing when there are multiple exits (some of which trap and some of which don't)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
Value *NewCond; | ||
if (ExitCount == ExactBTC) { | ||
NewCond = L->contains(BI->getSuccessor(0)) ? | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably makes sense to skip doesNotAccessMemory() instructions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See the comment below. We have to be very careful not to depend on anything in the loop. I would rather do more sophisticated trap-handlers in a follow up, given both libcxx hardening and ubsan just directly trap.