Skip to content

Commit 549b816

Browse files
committed
[LoopIdiom] Perform loop versioning to use memcpy
LoopIdiomRecognize has given up on processing loops if an alias can exist for the target load and store. This patch introduces loop versioning within the LoopIdiomRecognize and improves the memcpy replacement in such cases. Currently, loop versioning is only performed for the loop that is dedicated to transferring content from a load to a store is supported. This limitatin prevents the code size from growing up. Related to #50892
1 parent 956cfa6 commit 549b816

File tree

5 files changed

+431
-56
lines changed

5 files changed

+431
-56
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 119 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,11 @@
8080
#include "llvm/Support/Debug.h"
8181
#include "llvm/Support/InstructionCost.h"
8282
#include "llvm/Support/raw_ostream.h"
83+
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
8384
#include "llvm/Transforms/Utils/BuildLibCalls.h"
8485
#include "llvm/Transforms/Utils/Local.h"
8586
#include "llvm/Transforms/Utils/LoopUtils.h"
87+
#include "llvm/Transforms/Utils/LoopVersioning.h"
8688
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
8789
#include <algorithm>
8890
#include <cassert>
@@ -132,6 +134,16 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
132134
"with -Os/-Oz"),
133135
cl::init(true), cl::Hidden);
134136

137+
static cl::opt<bool> EnableLoopVersioning(
138+
"enable-" DEBUG_TYPE "-version",
139+
cl::desc("Allow loop idiom recognize to version loop(s) when converting"),
140+
cl::init(true), cl::ReallyHidden);
141+
142+
static cl::opt<int> LoopVersioningLengthLimit(
143+
DEBUG_TYPE "-lv-lenght-limit",
144+
cl::desc("Lower length limit for loop versioning"), cl::init(12),
145+
cl::ReallyHidden);
146+
135147
namespace {
136148

137149
class LoopIdiomRecognize {
@@ -146,15 +158,18 @@ class LoopIdiomRecognize {
146158
OptimizationRemarkEmitter &ORE;
147159
bool ApplyCodeSizeHeuristics;
148160
std::unique_ptr<MemorySSAUpdater> MSSAU;
161+
const LoopAccessInfo &LAI;
149162

150163
public:
151164
explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
152165
LoopInfo *LI, ScalarEvolution *SE,
153166
TargetLibraryInfo *TLI,
154167
const TargetTransformInfo *TTI, MemorySSA *MSSA,
155168
const DataLayout *DL,
156-
OptimizationRemarkEmitter &ORE)
157-
: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
169+
OptimizationRemarkEmitter &ORE,
170+
const LoopAccessInfo &LAI)
171+
: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE),
172+
LAI(LAI) {
158173
if (MSSA)
159174
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
160175
}
@@ -220,6 +235,9 @@ class LoopIdiomRecognize {
220235
const SCEV *BECount);
221236
bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
222237
bool IsLoopMemset = false);
238+
bool shouldVersionLoopForMemCpy(Instruction *TheStore,
239+
Instruction *TheLoad) const;
240+
void versionLoop(const SCEV *BECount, SCEVExpander &Expander);
223241

224242
/// @}
225243
/// \name Noncountable Loop Idiom Handling
@@ -264,8 +282,9 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
264282
// but ORE cannot be preserved (see comment before the pass definition).
265283
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
266284

285+
LoopAccessInfoManager LAIs(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, &AR.TLI);
267286
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
268-
AR.MSSA, DL, ORE);
287+
AR.MSSA, DL, ORE, LAIs.getInfo(L));
269288
if (!LIR.runOnLoop(&L))
270289
return PreservedAnalyses::all();
271290

@@ -1359,13 +1378,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
13591378
}
13601379

13611380
bool IsAtomic = TheStore->isAtomic() || TheLoad->isAtomic();
1362-
bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1363-
13641381
if (IsAtomic) {
1365-
// For now don't support unordered atomic memmove.
1366-
if (UseMemMove)
1367-
return Changed;
1368-
13691382
// We cannot allow unaligned ops for unordered load/store, so reject
13701383
// anything where the alignment isn't at least the element size.
13711384
assert((StoreAlign && LoadAlign) &&
@@ -1381,14 +1394,29 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
13811394
return Changed;
13821395
}
13831396

1384-
if (UseMemMove)
1385-
if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1386-
IsMemCpy))
1387-
return Changed;
1388-
13891397
if (avoidLIRForMultiBlockLoop())
13901398
return Changed;
13911399

1400+
bool MayOverlap = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1401+
bool UseMemMove = false;
1402+
1403+
// First, see if it is possible to use memmove. If not, determine whether we
1404+
// should version the loops to replace the instructions with memcpy. If both
1405+
// are rejected, then bail out.
1406+
// TODO: It may be better to perform the versioning at first, then use memcpy
1407+
// in the versioned loop and memmove in the original loop.
1408+
if (MayOverlap) {
1409+
// For now don't support unordered atomic memmove.
1410+
if (!IsAtomic && Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride,
1411+
*TheLoad, IsMemCpy)) {
1412+
UseMemMove = true;
1413+
} else if (shouldVersionLoopForMemCpy(TheStore, TheLoad)) {
1414+
versionLoop(BECount, Expander);
1415+
} else {
1416+
return Changed;
1417+
}
1418+
}
1419+
13921420
// Okay, everything is safe, we can transform this!
13931421

13941422
const SCEV *NumBytesS =
@@ -1486,6 +1514,83 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
14861514
return false;
14871515
}
14881516

1517+
// Returns true if we should version the loop and make sure that there is no
1518+
// alias between the store and the load. This allows us to use `memcpy` instead
1519+
// of `memmove`. However, versioning increases the code size. In the worst case,
1520+
// if there are multiple load/store pairs, the code size increases
1521+
// exponentially. Therefore, versioning is supported only if the loop only does
1522+
// transfers related to this store and load. That is, we will version the loop
1523+
// as follows:
1524+
//
1525+
// ```
1526+
// for (i=0; i<len; i++)
1527+
// dst[i] = src[i];
1528+
// ```
1529+
//
1530+
// But we don't want to do this if there are other processes inside the loop,
1531+
// e.g.,
1532+
//
1533+
// ```
1534+
// acc = 0;
1535+
// for (i=0; i<len; i++) {
1536+
// dst[i] = src[i];
1537+
// acc += ...;
1538+
// }
1539+
// ```
1540+
bool LoopIdiomRecognize::shouldVersionLoopForMemCpy(
1541+
Instruction *TheStore, Instruction *TheLoad) const {
1542+
if (ApplyCodeSizeHeuristics || !EnableLoopVersioning)
1543+
return false;
1544+
1545+
// There are cases where the load and store always overlap. Avoid versioning
1546+
// in these situations.
1547+
auto *Checking = LAI.getRuntimePointerChecking();
1548+
if (Checking->getNumberOfChecks() == 0)
1549+
return false;
1550+
1551+
BasicBlock *Cur = TheStore->getParent();
1552+
for (auto &I : *Cur) {
1553+
if (I.isDebugOrPseudoInst() || I.isTerminator())
1554+
continue;
1555+
1556+
// If there is a memory instruction other then `TheStore` and `TheLoad`,
1557+
// then bail out.
1558+
if (I.mayReadOrWriteMemory() && (&I) != TheStore && (&I) != TheLoad)
1559+
return false;
1560+
1561+
// We also abandon the versioning if there is an instruction other than
1562+
// `TheStore`, `TheLoad`, and anything related to loop control.
1563+
for (const auto &U : I.uses()) {
1564+
const Instruction *UseI = cast<Instruction>(U.getUser());
1565+
if (UseI->getParent() != Cur)
1566+
return false;
1567+
}
1568+
}
1569+
return true;
1570+
}
1571+
1572+
void LoopIdiomRecognize::versionLoop(const SCEV *BECount,
1573+
SCEVExpander &Expander) {
1574+
BasicBlock *Preheader = CurLoop->getLoopPreheader();
1575+
LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(),
1576+
CurLoop, LI, DT, SE);
1577+
LVer.versionLoop();
1578+
1579+
// If the loop trip count is small, the overhead of function calls may not be
1580+
// negligible. In this case, avoid function calls and run the loop as is.
1581+
BranchInst *Branch = cast<BranchInst>(Preheader->getTerminator());
1582+
if (!Branch)
1583+
return;
1584+
Type *IntTy = BECount->getType();
1585+
Value *Cond = Branch->getCondition();
1586+
Value *TripCount = Expander.expandCodeFor(BECount, IntTy, Branch);
1587+
IRBuilder<> Builder(Branch);
1588+
Value *BoundCond = Builder.CreateICmpSLT(
1589+
TripCount, ConstantInt::get(IntTy, LoopVersioningLengthLimit));
1590+
Value *NewCond = Builder.CreateOr(Cond, BoundCond);
1591+
Branch->setCondition(NewCond);
1592+
}
1593+
14891594
bool LoopIdiomRecognize::runOnNoncountableLoop() {
14901595
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
14911596
<< CurLoop->getHeader()->getParent()->getName()

llvm/test/Transforms/LoopIdiom/basic.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -624,27 +624,27 @@ for.end: ; preds = %for.body
624624

625625

626626

627-
; PR9815 - This is a partial overlap case that cannot be safely transformed
628-
; into a memcpy.
627+
; This is a partial overlap case that needs alias checks to be safely
628+
; transformed into a memcpy.
629629
@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
630630

631631
define i32 @test14() nounwind {
632632
; CHECK-LABEL: @test14(
633633
; CHECK-NEXT: entry:
634-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
634+
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
635635
; CHECK: for.body:
636-
; CHECK-NEXT: [[T5:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
637-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T5]], 4
638-
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
639-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM]]
640-
; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
641-
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[T5]], 5
642-
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
643-
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM5]]
644-
; CHECK-NEXT: store i32 [[T2]], ptr [[ARRAYIDX6]], align 4
645-
; CHECK-NEXT: [[INC]] = add nsw i32 [[T5]], 1
646-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 2
647-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
636+
; CHECK-NEXT: [[T6:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_BODY1]] ], [ 0, [[FOR_BODY_PH:%.*]] ]
637+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], 4
638+
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[ADD1]] to i64
639+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM1]]
640+
; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
641+
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[T6]], 5
642+
; CHECK-NEXT: [[IDXPROM6:%.*]] = sext i32 [[ADD5]] to i64
643+
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [7 x i32], ptr @g_50, i32 0, i64 [[IDXPROM6]]
644+
; CHECK-NEXT: store i32 [[T3]], ptr [[ARRAYIDX7]], align 4
645+
; CHECK-NEXT: [[INC1]] = add nsw i32 [[T6]], 1
646+
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[INC1]], 2
647+
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY1]], label [[FOR_END_LOOPEXIT1:%.*]]
648648
; CHECK: for.end:
649649
; CHECK-NEXT: [[T8:%.*]] = load i32, ptr getelementptr inbounds ([7 x i32], ptr @g_50, i32 0, i64 6), align 4
650650
; CHECK-NEXT: ret i32 [[T8]]

llvm/test/Transforms/LoopIdiom/expander-do-not-delete-reused-values.ll

Lines changed: 86 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,94 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -passes=loop-idiom -S %s | FileCheck %s
2+
; RUN: opt -passes=loop-idiom -enable-loop-idiom-version=0 -S %s | FileCheck %s --check-prefix=CHECK-NO-VERSION
3+
; RUN: opt -passes=loop-idiom -S %s | FileCheck %s --check-prefix=CHECK-VERSION
34

45
; Make sure we do not delete instructions not inserted during expansion, e.g.
5-
; because the expande re-used existing instructions.
6+
; because the expander re-used existing instructions.
67

78
define void @test(i64 %init, ptr %ptr) {
8-
; CHECK-LABEL: @test(
9-
; CHECK-NEXT: entry:
10-
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
11-
; CHECK: outer.header:
12-
; CHECK-NEXT: [[J_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[OUTER_LATCH:%.*]] ]
13-
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[OUTER_LATCH]] ]
14-
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i32 [[I_0]]
15-
; CHECK-NEXT: br label [[INNER:%.*]]
16-
; CHECK: inner:
17-
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[INNER_IV_NEXT:%.*]], [[INNER]] ], [ [[INIT:%.*]], [[OUTER_HEADER]] ]
18-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 [[INNER_IV]]
19-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
20-
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[INNER_IV]]
21-
; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX3]], align 4
22-
; CHECK-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
23-
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 0
24-
; CHECK-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH]], label [[INNER]]
25-
; CHECK: outer.latch:
26-
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_0]], 1
27-
; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_0]], [[INC]]
28-
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i32 [[ADD]], 4000
29-
; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
30-
; CHECK: exit:
31-
; CHECK-NEXT: ret void
9+
; CHECK-NO-VERSION-LABEL: @test(
10+
; CHECK-NO-VERSION-NEXT: entry:
11+
; CHECK-NO-VERSION-NEXT: br label [[OUTER_HEADER:%.*]]
12+
; CHECK-NO-VERSION: outer.header:
13+
; CHECK-NO-VERSION-NEXT: [[J_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[OUTER_LATCH:%.*]] ]
14+
; CHECK-NO-VERSION-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[OUTER_LATCH]] ]
15+
; CHECK-NO-VERSION-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i32 [[I_0]]
16+
; CHECK-NO-VERSION-NEXT: br label [[INNER:%.*]]
17+
; CHECK-NO-VERSION: inner:
18+
; CHECK-NO-VERSION-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[INNER_IV_NEXT:%.*]], [[INNER]] ], [ [[INIT:%.*]], [[OUTER_HEADER]] ]
19+
; CHECK-NO-VERSION-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 [[INNER_IV]]
20+
; CHECK-NO-VERSION-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
21+
; CHECK-NO-VERSION-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[INNER_IV]]
22+
; CHECK-NO-VERSION-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX3]], align 4
23+
; CHECK-NO-VERSION-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
24+
; CHECK-NO-VERSION-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 0
25+
; CHECK-NO-VERSION-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH]], label [[INNER]]
26+
; CHECK-NO-VERSION: outer.latch:
27+
; CHECK-NO-VERSION-NEXT: [[INC]] = add nuw nsw i32 [[J_0]], 1
28+
; CHECK-NO-VERSION-NEXT: [[ADD]] = add nuw nsw i32 [[I_0]], [[INC]]
29+
; CHECK-NO-VERSION-NEXT: [[EC_2:%.*]] = icmp eq i32 [[ADD]], 4000
30+
; CHECK-NO-VERSION-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
31+
; CHECK-NO-VERSION: exit:
32+
; CHECK-NO-VERSION-NEXT: ret void
33+
;
34+
; CHECK-VERSION-LABEL: @test(
35+
; CHECK-VERSION-NEXT: entry:
36+
; CHECK-VERSION-NEXT: [[TMP0:%.*]] = shl i64 [[INIT:%.*]], 2
37+
; CHECK-VERSION-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]]
38+
; CHECK-VERSION-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]]
39+
; CHECK-VERSION-NEXT: [[TMP9:%.*]] = sub i64 -1, [[INIT]]
40+
; CHECK-VERSION-NEXT: [[TMP1:%.*]] = mul i64 [[INIT]], -4
41+
; CHECK-VERSION-NEXT: br label [[INNER_LVER_CHECK:%.*]]
42+
; CHECK-VERSION: inner.lver.check:
43+
; CHECK-VERSION-NEXT: [[J_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[OUTER_LATCH:%.*]] ]
44+
; CHECK-VERSION-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[OUTER_LATCH]] ]
45+
; CHECK-VERSION-NEXT: [[TMP2:%.*]] = sext i32 [[I_0]] to i64
46+
; CHECK-VERSION-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
47+
; CHECK-VERSION-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 [[TMP3]]
48+
; CHECK-VERSION-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP3]]
49+
; CHECK-VERSION-NEXT: [[TMP4:%.*]] = sext i32 [[I_0]] to i64
50+
; CHECK-VERSION-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP4]], 2
51+
; CHECK-VERSION-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP5]]
52+
; CHECK-VERSION-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR]], i32 [[I_0]]
53+
; CHECK-VERSION-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP3]], [[PTR]]
54+
; CHECK-VERSION-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP4]]
55+
; CHECK-VERSION-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
56+
; CHECK-VERSION-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP9]], 12
57+
; CHECK-VERSION-NEXT: [[TMP8:%.*]] = or i1 [[FOUND_CONFLICT]], [[TMP10]]
58+
; CHECK-VERSION-NEXT: br i1 [[TMP8]], label [[INNER_PH_LVER_ORIG:%.*]], label [[INNER_PH:%.*]]
59+
; CHECK-VERSION: inner.ph.lver.orig:
60+
; CHECK-VERSION-NEXT: br label [[INNER_LVER_ORIG:%.*]]
61+
; CHECK-VERSION: inner.lver.orig:
62+
; CHECK-VERSION-NEXT: [[INNER_IV_LVER_ORIG:%.*]] = phi i64 [ [[INNER_IV_NEXT_LVER_ORIG:%.*]], [[INNER_LVER_ORIG]] ], [ [[INIT]], [[INNER_PH_LVER_ORIG]] ]
63+
; CHECK-VERSION-NEXT: [[ARRAYIDX_LVER_ORIG:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 [[INNER_IV_LVER_ORIG]]
64+
; CHECK-VERSION-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_LVER_ORIG]], align 4
65+
; CHECK-VERSION-NEXT: [[ARRAYIDX3_LVER_ORIG:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[INNER_IV_LVER_ORIG]]
66+
; CHECK-VERSION-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX3_LVER_ORIG]], align 4
67+
; CHECK-VERSION-NEXT: [[INNER_IV_NEXT_LVER_ORIG]] = add nsw i64 [[INNER_IV_LVER_ORIG]], 1
68+
; CHECK-VERSION-NEXT: [[EC_1_LVER_ORIG:%.*]] = icmp eq i64 [[INNER_IV_NEXT_LVER_ORIG]], 0
69+
; CHECK-VERSION-NEXT: br i1 [[EC_1_LVER_ORIG]], label [[OUTER_LATCH_LOOPEXIT:%.*]], label [[INNER_LVER_ORIG]]
70+
; CHECK-VERSION: inner.ph:
71+
; CHECK-VERSION-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SCEVGEP1]], ptr align 4 [[SCEVGEP]], i64 [[TMP1]], i1 false)
72+
; CHECK-VERSION-NEXT: br label [[INNER:%.*]]
73+
; CHECK-VERSION: inner:
74+
; CHECK-VERSION-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[INNER_IV_NEXT:%.*]], [[INNER]] ], [ [[INIT]], [[INNER_PH]] ]
75+
; CHECK-VERSION-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 [[INNER_IV]]
76+
; CHECK-VERSION-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
77+
; CHECK-VERSION-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[INNER_IV]]
78+
; CHECK-VERSION-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
79+
; CHECK-VERSION-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 0
80+
; CHECK-VERSION-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH_LOOPEXIT5:%.*]], label [[INNER]]
81+
; CHECK-VERSION: outer.latch.loopexit:
82+
; CHECK-VERSION-NEXT: br label [[OUTER_LATCH]]
83+
; CHECK-VERSION: outer.latch.loopexit5:
84+
; CHECK-VERSION-NEXT: br label [[OUTER_LATCH]]
85+
; CHECK-VERSION: outer.latch:
86+
; CHECK-VERSION-NEXT: [[INC]] = add nuw nsw i32 [[J_0]], 1
87+
; CHECK-VERSION-NEXT: [[ADD]] = add nuw nsw i32 [[I_0]], [[INC]]
88+
; CHECK-VERSION-NEXT: [[EC_2:%.*]] = icmp eq i32 [[ADD]], 4000
89+
; CHECK-VERSION-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[INNER_LVER_CHECK]]
90+
; CHECK-VERSION: exit:
91+
; CHECK-VERSION-NEXT: ret void
3292
;
3393
entry:
3494
br label %outer.header

0 commit comments

Comments
 (0)