Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 119 additions & 14 deletions llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,11 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -132,6 +134,16 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
"with -Os/-Oz"),
cl::init(true), cl::Hidden);

static cl::opt<bool> EnableLoopVersioning(
"enable-" DEBUG_TYPE "-version",
cl::desc("Allow loop idiom recognize to version loop(s) when converting"),
cl::init(true), cl::ReallyHidden);

static cl::opt<int> LoopVersioningLengthLimit(
DEBUG_TYPE "-lv-lenght-limit",
cl::desc("Lower length limit for loop versioning"), cl::init(12),
cl::ReallyHidden);

namespace {

class LoopIdiomRecognize {
Expand All @@ -146,15 +158,18 @@ class LoopIdiomRecognize {
OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
std::unique_ptr<MemorySSAUpdater> MSSAU;
const LoopAccessInfo &LAI;

public:
explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, MemorySSA *MSSA,
const DataLayout *DL,
OptimizationRemarkEmitter &ORE)
: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
OptimizationRemarkEmitter &ORE,
const LoopAccessInfo &LAI)
: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE),
LAI(LAI) {
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
Expand Down Expand Up @@ -220,6 +235,9 @@ class LoopIdiomRecognize {
const SCEV *BECount);
bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
bool IsLoopMemset = false);
bool shouldVersionLoopForMemCpy(Instruction *TheStore,
Instruction *TheLoad) const;
void versionLoop(const SCEV *BECount, SCEVExpander &Expander);

/// @}
/// \name Noncountable Loop Idiom Handling
Expand Down Expand Up @@ -264,8 +282,9 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());

LoopAccessInfoManager LAIs(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, &AR.TLI);
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
AR.MSSA, DL, ORE);
AR.MSSA, DL, ORE, LAIs.getInfo(L));
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();

Expand Down Expand Up @@ -1359,13 +1378,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
}

bool IsAtomic = TheStore->isAtomic() || TheLoad->isAtomic();
bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;

if (IsAtomic) {
// For now don't support unordered atomic memmove.
if (UseMemMove)
return Changed;

// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
assert((StoreAlign && LoadAlign) &&
Expand All @@ -1381,14 +1394,29 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
return Changed;
}

if (UseMemMove)
if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
IsMemCpy))
return Changed;

if (avoidLIRForMultiBlockLoop())
return Changed;

bool MayOverlap = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
bool UseMemMove = false;

// First, see if it is possible to use memmove. If not, determine whether we
// should version the loops to replace the instructions with memcpy. If both
// are rejected, then bail out.
// TODO: It may be better to perform the versioning at first, then use memcpy
// in the versioned loop and memmove in the original loop.
if (MayOverlap) {
// For now don't support unordered atomic memmove.
if (!IsAtomic && Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride,
*TheLoad, IsMemCpy)) {
UseMemMove = true;
} else if (shouldVersionLoopForMemCpy(TheStore, TheLoad)) {
versionLoop(BECount, Expander);
} else {
return Changed;
}
}

// Okay, everything is safe, we can transform this!

const SCEV *NumBytesS =
Expand Down Expand Up @@ -1486,6 +1514,83 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
return false;
}

// Returns true if we should version the loop and make sure that there is no
// alias between the store and the load. This allows us to use `memcpy` instead
// of `memmove`. However, versioning increases the code size. In the worst case,
// if there are multiple load/store pairs, the code size increases
// exponentially. Therefore, versioning is supported only if the loop only does
// transfers related to this store and load. That is, we will version the loop
// as follows:
//
// ```
// for (i=0; i<len; i++)
// dst[i] = src[i];
// ```
//
// But we don't want to do this if there are other processes inside the loop,
// e.g.,
//
// ```
// acc = 0;
// for (i=0; i<len; i++) {
// dst[i] = src[i];
// acc += ...;
// }
// ```
bool LoopIdiomRecognize::shouldVersionLoopForMemCpy(
Instruction *TheStore, Instruction *TheLoad) const {
if (ApplyCodeSizeHeuristics || !EnableLoopVersioning)
return false;

// There are cases where the load and store always overlap. Avoid versioning
// in these situations.
auto *Checking = LAI.getRuntimePointerChecking();
if (Checking->getNumberOfChecks() == 0)
return false;

BasicBlock *Cur = TheStore->getParent();
for (auto &I : *Cur) {
if (I.isDebugOrPseudoInst() || I.isTerminator())
continue;

// If there is a memory instruction other then `TheStore` and `TheLoad`,
// then bail out.
if (I.mayReadOrWriteMemory() && (&I) != TheStore && (&I) != TheLoad)
return false;

// We also abandon the versioning if there is an instruction other than
// `TheStore`, `TheLoad`, and anything related to loop control.
for (const auto &U : I.uses()) {
const Instruction *UseI = cast<Instruction>(U.getUser());
if (UseI->getParent() != Cur)
return false;
}
}
Comment on lines +1561 to +1568
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this process is achieving what I want to do. I'd like to target the loop that is "dedicated" to the copy from TheLoad to TheStore.

return true;
}

void LoopIdiomRecognize::versionLoop(const SCEV *BECount,
SCEVExpander &Expander) {
BasicBlock *Preheader = CurLoop->getLoopPreheader();
LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(),
CurLoop, LI, DT, SE);
LVer.versionLoop();

// If the loop trip count is small, the overhead of function calls may not be
// negligible. In this case, avoid function calls and run the loop as is.
BranchInst *Branch = cast<BranchInst>(Preheader->getTerminator());
if (!Branch)
return;
Type *IntTy = BECount->getType();
Value *Cond = Branch->getCondition();
Value *TripCount = Expander.expandCodeFor(BECount, IntTy, Branch);
IRBuilder<> Builder(Branch);
Value *BoundCond = Builder.CreateICmpSLT(
TripCount, ConstantInt::get(IntTy, LoopVersioningLengthLimit));
Value *NewCond = Builder.CreateOr(Cond, BoundCond);
Branch->setCondition(NewCond);
}

bool LoopIdiomRecognize::runOnNoncountableLoop() {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName()
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/Transforms/LoopIdiom/basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -624,27 +624,27 @@ for.end: ; preds = %for.body



; PR9815 - This is a partial overlap case that cannot be safely transformed
; into a memcpy.
; This is a partial overlap case that needs alias checks to be safely
; transformed into a memcpy.
@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16

define i32 @test14() nounwind {
; CHECK-LABEL: @test14(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[T5:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T5]], 4
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM]]
; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[T5]], 5
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM5]]
; CHECK-NEXT: store i32 [[T2]], ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[T5]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 2
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK-NEXT: [[T6:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_BODY1]] ], [ 0, [[FOR_BODY_PH:%.*]] ]
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], 4
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[ADD1]] to i64
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM1]]
; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[T6]], 5
; CHECK-NEXT: [[IDXPROM6:%.*]] = sext i32 [[ADD5]] to i64
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM6]]
; CHECK-NEXT: store i32 [[T3]], ptr [[ARRAYIDX7]], align 4
; CHECK-NEXT: [[INC1]] = add nsw i32 [[T6]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[INC1]], 2
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY1]], label [[FOR_END_LOOPEXIT1:%.*]]
; CHECK: for.end:
; CHECK-NEXT: [[T8:%.*]] = load i32, ptr getelementptr inbounds ([7 x i32], ptr @g_50, i32 0, i64 6), align 4
; CHECK-NEXT: ret i32 [[T8]]
Expand Down
112 changes: 86 additions & 26 deletions llvm/test/Transforms/LoopIdiom/expander-do-not-delete-reused-values.ll
Original file line number Diff line number Diff line change
@@ -1,34 +1,94 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-idiom -S %s | FileCheck %s
; RUN: opt -passes=loop-idiom -enable-loop-idiom-version=0 -S %s | FileCheck %s --check-prefix=CHECK-NO-VERSION
; RUN: opt -passes=loop-idiom -S %s | FileCheck %s --check-prefix=CHECK-VERSION

; Make sure we do not delete instructions not inserted during expansion, e.g.
; because the expande re-used existing instructions.
; because the expander re-used existing instructions.

define void @test(i64 %init, ptr %ptr) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[J_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[OUTER_LATCH:%.*]] ]
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[OUTER_LATCH]] ]
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i32 [[I_0]]
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[INNER_IV_NEXT:%.*]], [[INNER]] ], [ [[INIT:%.*]], [[OUTER_HEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 [[INNER_IV]]
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[INNER_IV]]
; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH]], label [[INNER]]
; CHECK: outer.latch:
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_0]], 1
; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_0]], [[INC]]
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i32 [[ADD]], 4000
; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK-NO-VERSION-LABEL: @test(
; CHECK-NO-VERSION-NEXT: entry:
; CHECK-NO-VERSION-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK-NO-VERSION: outer.header:
; CHECK-NO-VERSION-NEXT: [[J_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[OUTER_LATCH:%.*]] ]
; CHECK-NO-VERSION-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[OUTER_LATCH]] ]
; CHECK-NO-VERSION-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i32 [[I_0]]
; CHECK-NO-VERSION-NEXT: br label [[INNER:%.*]]
; CHECK-NO-VERSION: inner:
; CHECK-NO-VERSION-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[INNER_IV_NEXT:%.*]], [[INNER]] ], [ [[INIT:%.*]], [[OUTER_HEADER]] ]
; CHECK-NO-VERSION-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 [[INNER_IV]]
; CHECK-NO-VERSION-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NO-VERSION-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[INNER_IV]]
; CHECK-NO-VERSION-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX3]], align 4
; CHECK-NO-VERSION-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
; CHECK-NO-VERSION-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 0
; CHECK-NO-VERSION-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH]], label [[INNER]]
; CHECK-NO-VERSION: outer.latch:
; CHECK-NO-VERSION-NEXT: [[INC]] = add nuw nsw i32 [[J_0]], 1
; CHECK-NO-VERSION-NEXT: [[ADD]] = add nuw nsw i32 [[I_0]], [[INC]]
; CHECK-NO-VERSION-NEXT: [[EC_2:%.*]] = icmp eq i32 [[ADD]], 4000
; CHECK-NO-VERSION-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
; CHECK-NO-VERSION: exit:
; CHECK-NO-VERSION-NEXT: ret void
;
; CHECK-VERSION-LABEL: @test(
; CHECK-VERSION-NEXT: entry:
; CHECK-VERSION-NEXT: [[TMP0:%.*]] = shl i64 [[INIT:%.*]], 2
; CHECK-VERSION-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]]
; CHECK-VERSION-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]]
; CHECK-VERSION-NEXT: [[TMP9:%.*]] = sub i64 -1, [[INIT]]
; CHECK-VERSION-NEXT: [[TMP1:%.*]] = mul i64 [[INIT]], -4
; CHECK-VERSION-NEXT: br label [[INNER_LVER_CHECK:%.*]]
; CHECK-VERSION: inner.lver.check:
; CHECK-VERSION-NEXT: [[J_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[OUTER_LATCH:%.*]] ]
; CHECK-VERSION-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[OUTER_LATCH]] ]
; CHECK-VERSION-NEXT: [[TMP2:%.*]] = sext i32 [[I_0]] to i64
; CHECK-VERSION-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
; CHECK-VERSION-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 [[TMP3]]
; CHECK-VERSION-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP3]]
; CHECK-VERSION-NEXT: [[TMP4:%.*]] = sext i32 [[I_0]] to i64
; CHECK-VERSION-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP4]], 2
; CHECK-VERSION-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP5]]
; CHECK-VERSION-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR]], i32 [[I_0]]
; CHECK-VERSION-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP3]], [[PTR]]
; CHECK-VERSION-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP4]]
; CHECK-VERSION-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-VERSION-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP9]], 12
; CHECK-VERSION-NEXT: [[TMP8:%.*]] = or i1 [[FOUND_CONFLICT]], [[TMP10]]
; CHECK-VERSION-NEXT: br i1 [[TMP8]], label [[INNER_PH_LVER_ORIG:%.*]], label [[INNER_PH:%.*]]
; CHECK-VERSION: inner.ph.lver.orig:
; CHECK-VERSION-NEXT: br label [[INNER_LVER_ORIG:%.*]]
; CHECK-VERSION: inner.lver.orig:
; CHECK-VERSION-NEXT: [[INNER_IV_LVER_ORIG:%.*]] = phi i64 [ [[INNER_IV_NEXT_LVER_ORIG:%.*]], [[INNER_LVER_ORIG]] ], [ [[INIT]], [[INNER_PH_LVER_ORIG]] ]
; CHECK-VERSION-NEXT: [[ARRAYIDX_LVER_ORIG:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 [[INNER_IV_LVER_ORIG]]
; CHECK-VERSION-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_LVER_ORIG]], align 4
; CHECK-VERSION-NEXT: [[ARRAYIDX3_LVER_ORIG:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[INNER_IV_LVER_ORIG]]
; CHECK-VERSION-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX3_LVER_ORIG]], align 4
; CHECK-VERSION-NEXT: [[INNER_IV_NEXT_LVER_ORIG]] = add nsw i64 [[INNER_IV_LVER_ORIG]], 1
; CHECK-VERSION-NEXT: [[EC_1_LVER_ORIG:%.*]] = icmp eq i64 [[INNER_IV_NEXT_LVER_ORIG]], 0
; CHECK-VERSION-NEXT: br i1 [[EC_1_LVER_ORIG]], label [[OUTER_LATCH_LOOPEXIT:%.*]], label [[INNER_LVER_ORIG]]
; CHECK-VERSION: inner.ph:
; CHECK-VERSION-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SCEVGEP1]], ptr align 4 [[SCEVGEP]], i64 [[TMP1]], i1 false)
; CHECK-VERSION-NEXT: br label [[INNER:%.*]]
; CHECK-VERSION: inner:
; CHECK-VERSION-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[INNER_IV_NEXT:%.*]], [[INNER]] ], [ [[INIT]], [[INNER_PH]] ]
; CHECK-VERSION-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 [[INNER_IV]]
; CHECK-VERSION-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-VERSION-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[INNER_IV]]
; CHECK-VERSION-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
; CHECK-VERSION-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 0
; CHECK-VERSION-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH_LOOPEXIT5:%.*]], label [[INNER]]
; CHECK-VERSION: outer.latch.loopexit:
; CHECK-VERSION-NEXT: br label [[OUTER_LATCH]]
; CHECK-VERSION: outer.latch.loopexit5:
; CHECK-VERSION-NEXT: br label [[OUTER_LATCH]]
; CHECK-VERSION: outer.latch:
; CHECK-VERSION-NEXT: [[INC]] = add nuw nsw i32 [[J_0]], 1
; CHECK-VERSION-NEXT: [[ADD]] = add nuw nsw i32 [[I_0]], [[INC]]
; CHECK-VERSION-NEXT: [[EC_2:%.*]] = icmp eq i32 [[ADD]], 4000
; CHECK-VERSION-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[INNER_LVER_CHECK]]
; CHECK-VERSION: exit:
; CHECK-VERSION-NEXT: ret void
;
entry:
br label %outer.header
Expand Down
Loading