5454#include " llvm/IR/InstrTypes.h"
5555#include " llvm/IR/Instruction.h"
5656#include " llvm/IR/Instructions.h"
57+ #include " llvm/IR/IntrinsicInst.h"
5758#include " llvm/IR/Intrinsics.h"
5859#include " llvm/IR/PassManager.h"
5960#include " llvm/IR/PatternMatch.h"
@@ -118,6 +119,10 @@ static cl::opt<bool>
118119LoopPredication (" indvars-predicate-loops" , cl::Hidden, cl::init(true ),
119120 cl::desc(" Predicate conditions in read only loops" ));
120121
122+ static cl::opt<bool > LoopPredicationTraps (
123+ " indvars-predicate-loop-traps" , cl::Hidden, cl::init(true ),
124+ cl::desc(" Predicate conditions that trap in loops with only local writes" ));
125+
121126static cl::opt<bool >
122127AllowIVWidening (" indvars-widen-indvars" , cl::Hidden, cl::init(true ),
123128 cl::desc(" Allow widening of indvars to eliminate s/zext" ));
@@ -1705,6 +1710,27 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
17051710 return Changed;
17061711}
17071712
1713+ static bool crashingBBWithoutEffect (const BasicBlock &BB) {
1714+ return llvm::all_of (BB, [](const Instruction &I) {
1715+ // TODO: for now this is overly restrictive, to make sure nothing in this
1716+ // BB can depend on the loop body.
1717+ // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1718+ // load does not have a side effect, but we could have
1719+ // %a = load ptr, ptr %ptr
1720+ // %b = load i32, ptr %a
1721+ // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1722+ // dereference by skipping over loop iterations.
1723+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
1724+ if (CB->onlyAccessesInaccessibleMemory () &&
1725+ llvm::all_of (CB->args (), [](const llvm::Use &U) {
1726+ return isa<Constant>(U.get ());
1727+ }))
1728+ return true ;
1729+ }
1730+ return isa<UnreachableInst>(I);
1731+ });
1732+ }
1733+
17081734bool IndVarSimplify::predicateLoopExits (Loop *L, SCEVExpander &Rewriter) {
17091735 SmallVector<BasicBlock*, 16 > ExitingBlocks;
17101736 L->getExitingBlocks (ExitingBlocks);
@@ -1817,11 +1843,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
18171843 // suggestions on how to improve this? I can obviously bail out for outer
18181844 // loops, but that seems less than ideal. MemorySSA can find memory writes,
18191845 // is that enough for *all* side effects?
1846+ bool HasThreadLocalSideEffects = false ;
18201847 for (BasicBlock *BB : L->blocks ())
18211848 for (auto &I : *BB)
18221849 // TODO:isGuaranteedToTransfer
1823- if (I.mayHaveSideEffects ())
1824- return false ;
1850+ if (I.mayHaveSideEffects ()) {
1851+ if (!LoopPredicationTraps)
1852+ return false ;
1853+ HasThreadLocalSideEffects = true ;
1854+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
1855+ // Simple stores cannot be observed by other threads.
1856+ // If HasThreadLocalSideEffects is set, we check
1857+ // crashingBBWithoutEffect to make sure that the crashing BB cannot
1858+ // observe them either.
1859+ if (!SI->isSimple ())
1860+ return false ;
1861+ } else {
1862+ return false ;
1863+ }
1864+ }
18251865
18261866 bool Changed = false ;
18271867 // Finally, do the actual predication for all predicatable blocks. A couple
@@ -1841,6 +1881,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
18411881 const SCEV *ExitCount = SE->getExitCount (L, ExitingBB);
18421882
18431883 auto *BI = cast<BranchInst>(ExitingBB->getTerminator ());
1884+ if (HasThreadLocalSideEffects) {
1885+ const BasicBlock *Unreachable = nullptr ;
1886+ for (const BasicBlock *Succ : BI->successors ()) {
1887+ if (isa<UnreachableInst>(Succ->getTerminator ()))
1888+ Unreachable = Succ;
1889+ }
1890+ // Exit BB which have one branch back into the loop and another one to
1891+ // a trap can still be optimized, because local side effects cannot
1892+ // be observed in the exit case (the trap). We could be smarter about
1893+ // this, but for now lets pattern match common cases that directly trap.
1894+ if (Unreachable == nullptr || !crashingBBWithoutEffect (*Unreachable))
1895+ return Changed;
1896+ }
18441897 Value *NewCond;
18451898 if (ExitCount == ExactBTC) {
18461899 NewCond = L->contains (BI->getSuccessor (0 )) ?
0 commit comments