Skip to content
57 changes: 55 additions & 2 deletions llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
Expand Down Expand Up @@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));

static cl::opt<bool> LoopPredicationTraps(
"indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions that trap in loops with only local writes"));

static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
Expand Down Expand Up @@ -1816,11 +1821,31 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
bool HasLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
if (I.mayHaveSideEffects())
return false;
if (I.mayHaveSideEffects()) {
if (!LoopPredicationTraps)
return false;
HasLocalSideEffects = true;
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
// The local could have leaked out of the function, so we need to
// consider atomic operations as effects.
// Because we need to preserve the relative order of volatile
// accesses, turn off this optimization if we see any of them.
// TODO:
// We could be smarter about volatile, and check whether the
// reordering is valid.
// We also could be smarter about atomic, and check whether the
// local has leaked.
if (SI->isAtomic() || SI->isVolatile() ||
findAllocaForValue(SI->getPointerOperand(), false) == nullptr)
return false;
} else {
return false;
}
}

bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
Expand All @@ -1840,6 +1865,34 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);

auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
if (HasLocalSideEffects) {
BasicBlock *Unreachable = nullptr;
BasicBlock *InLoop = nullptr;
for (BasicBlock *Succ : BI->successors()) {
if (isa<UnreachableInst>(Succ->getTerminator()))
Unreachable = Succ;
else if (L->contains(Succ))
InLoop = Succ;
}
// Exit BB which have one branch back into the loop and another one to
// a trap can still be optimized, because local side effects cannot
// be observed in the exit case (the trap). We could be smarter about
// this, but for now lets pattern match common cases that directly trap.
if (Unreachable == nullptr || InLoop == nullptr)
return Changed;
if (llvm::any_of(*Unreachable, [](Instruction &I) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() != Intrinsic::trap &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really want to introduce trap-specific optimizations in IndVars. The general idea here is useful also for libcxx hardening, rust bounds checks, etc.

The way I would generalize this is that we have an unreachable (or even a return) which is preceded by instructions that do not Ref the allocas. We could do something generic with AA here, or limit this to the cases where either a) the stored allocas do not escape or b) the instruction do not read anything but inaccessible memory. llvm.trap is already memory(inaccessiblemem: readwrite). llvm.ubsantrap isn't, but should probably be.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I can generalize. I was going to do it in some follow up, but can also do it in this change.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nikic - I think you slightly misread this. I don't see anything in the logic which is assuming an alloca, instead the approach seems to be to say that the side effect being removed can't be observed before termination of the program. In particular, I believe this currently allows arbitrary heap memory.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't currently allow heap memory stores in the loop. That is what the findAllocaForValue(...) == nullptr does.

But thinking about it again that maybe isn't necessary, because we disallow synchronization in the loop. In general, I wanted to play it safe in the first iteration of this (by restricting to alloacs, and only allowing straight traps) and then improve in follow ups.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The general idea here is useful also for libcxx hardening, rust bounds checks, etc.

AFAICT, libcxx hardening just calls llvm.trap() in the failure path for non-debug mode, so this patch should work as is for that.

II->getIntrinsicID() != Intrinsic::ubsantrap)
return true;
} else if (!isa<UnreachableInst>(I)) {
return true;
}
return false;
})) {
return Changed;
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add tests to check this does the right thing when there are multiple exits (some of which trap and some of which don't)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?
Expand Down
Loading
Loading