Skip to content
54 changes: 52 additions & 2 deletions llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
Expand Down Expand Up @@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));

static cl::opt<bool> LoopPredicationTraps(
"indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions that trap in loops with only local writes"));

static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
Expand Down Expand Up @@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
return Changed;
}

static bool crashingBBWithoutEffect(const BasicBlock &BB) {
return llvm::all_of(BB, [](const Instruction &I) {
// TODO: for now this is overly restrictive, to make sure nothing in this
// BB can depend on the loop body.
// It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
// load does not have a side effect, but we could have
// %a = load ptr, ptr %ptr
// %b = load i32, ptr %a
// Now if the loop stored a non-nullptr to %a, we could cause a nullptr
// dereference by skipping over loop iterations.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably makes sense to skip doesNotAccessMemory() instructions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the comment below. We have to be very careful not to depend on anything in the loop. I would rather do more sophisticated trap-handlers in a follow up, given both libcxx hardening and ubsan just directly trap.

if (const auto *CB = dyn_cast<CallBase>(&I)) {
if (CB->onlyAccessesInaccessibleMemory())
return true;
}
return isa<UnreachableInst>(I);
});
}

bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
Expand Down Expand Up @@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
bool HasThreadLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
if (I.mayHaveSideEffects())
return false;
if (I.mayHaveSideEffects()) {
if (!LoopPredicationTraps)
return false;
HasThreadLocalSideEffects = true;
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
// Simple stores cannot be observed by other threads.
// If HasThreadLocalSideEffects is set, we check
// crashingBBWithoutEffect to make sure that the crashing BB cannot
// observe them either.
if (!SI->isSimple())
return false;
} else {
return false;
}
}

bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
Expand All @@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);

auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
if (HasThreadLocalSideEffects) {
const BasicBlock *Unreachable = nullptr;
for (const BasicBlock *Succ : BI->successors()) {
if (isa<UnreachableInst>(Succ->getTerminator()))
Unreachable = Succ;
}
// Exit BB which have one branch back into the loop and another one to
// a trap can still be optimized, because local side effects cannot
// be observed in the exit case (the trap). We could be smarter about
// this, but for now lets pattern match common cases that directly trap.
if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
return Changed;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add tests to check this does the right thing when there are multiple exits (some of which trap and some of which don't)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?
Expand Down
16 changes: 4 additions & 12 deletions llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ define void @f_sadd_overflow(ptr %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 2147483645, %[[ENTRY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 2147483647
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK-NEXT: br i1 true, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK: [[TRAP]]:
; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]]
; CHECK-NEXT: unreachable, !nosanitize [[META0]]
Expand Down Expand Up @@ -150,8 +149,7 @@ define void @f_uadd_overflow(ptr %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ -6, %[[ENTRY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK-NEXT: br i1 true, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK: [[TRAP]]:
; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]]
; CHECK-NEXT: unreachable, !nosanitize [[META0]]
Expand Down Expand Up @@ -243,10 +241,7 @@ define void @f_ssub_overflow(ptr nocapture %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ -2147483642, %[[ENTRY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[TMP0]], i32 1)
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
; CHECK-NEXT: br i1 [[TMP2]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK-NEXT: br i1 true, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK: [[TRAP]]:
; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]]
; CHECK-NEXT: unreachable, !nosanitize [[META0]]
Expand Down Expand Up @@ -339,10 +334,7 @@ define void @f_usub_overflow(ptr nocapture %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 15, %[[ENTRY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[TMP0]], i32 1)
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
; CHECK-NEXT: br i1 [[TMP2]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK-NEXT: br i1 true, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK: [[TRAP]]:
; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]]
; CHECK-NEXT: unreachable, !nosanitize [[META0]]
Expand Down
Loading