Skip to content

Commit c651b2c

Browse files
committed
[LoopPeel] Remove known trip count restriction when peeling last.
Remove the restriction that the loop must be known to execute at least 2 iterations when peeling the last iteration. If we cannot prove at least 2 iterations are executed, a check and branch to skip the peeled loop is inserted.
1 parent 1cf5466 commit c651b2c

File tree

4 files changed

+114
-30
lines changed

4 files changed

+114
-30
lines changed

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "llvm/Transforms/Utils/Cloning.h"
3939
#include "llvm/Transforms/Utils/LoopSimplify.h"
4040
#include "llvm/Transforms/Utils/LoopUtils.h"
41+
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
4142
#include "llvm/Transforms/Utils/ValueMapper.h"
4243
#include <algorithm>
4344
#include <cassert>
@@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
330331

331332
bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
332333
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
333-
// The loop must execute at least 2 iterations to guarantee that peeled
334-
// iteration executes.
335-
// TODO: Add checks during codegen.
336-
if (isa<SCEVCouldNotCompute>(BTC) ||
337-
!SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType())))
334+
if (isa<SCEVCouldNotCompute>(BTC))
338335
return false;
339336

340337
// Check if the exit condition of the loop can be adjusted by the peeling
@@ -822,7 +819,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
822819
/// instructions in the last peeled-off iteration.
823820
static void cloneLoopBlocks(
824821
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
825-
BasicBlock *InsertBot,
822+
BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
826823
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
827824
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
828825
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -916,10 +913,19 @@ static void cloneLoopBlocks(
916913
if (PeelLast) {
917914
// For the last iteration, we use the value from the latch of the original
918915
// loop directly.
916+
//
917+
IRBuilder<> B(InsertTop->getTerminator());
919918
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
920919
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
921-
VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch);
920+
PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
922921
NewPHI->eraseFromParent();
922+
if (OrigPreHeader)
923+
PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
924+
OrigPreHeader);
925+
926+
PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
927+
Latch);
928+
VMap[&*I] = PN;
923929
}
924930
} else {
925931
// For the first iteration, we use the value from the preheader directly.
@@ -1053,7 +1059,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10531059
// Set up all the necessary basic blocks.
10541060
BasicBlock *InsertTop;
10551061
BasicBlock *InsertBot;
1056-
BasicBlock *NewPreHeader;
1062+
BasicBlock *NewPreHeader = nullptr;
10571063
DenseMap<Instruction *, Value *> ExitValues;
10581064
if (PeelLast) {
10591065
// It is convenient to split the single exit block from the latch the
@@ -1084,11 +1090,40 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10841090
for (PHINode &P : Exit->phis())
10851091
ExitValues[&P] = P.getIncomingValueForBlock(Latch);
10861092

1093+
const SCEV *BTC = SE->getBackedgeTakenCount(L);
1094+
10871095
InsertTop = SplitEdge(Latch, Exit, &DT, LI);
10881096
InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
10891097

10901098
InsertTop->setName(Exit->getName() + ".peel.begin");
10911099
InsertBot->setName(Exit->getName() + ".peel.next");
1100+
NewPreHeader = nullptr;
1101+
1102+
// If the original loop may only execute a single iteration we need to
1103+
// insert a trip count check and skip the peeled loop if necessary.
1104+
if (!SE->isKnownPredicate(CmpInst::ICMP_UGT, BTC,
1105+
SE->getZero(BTC->getType()))) {
1106+
NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
1107+
SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
1108+
1109+
BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1110+
Value *BTCValue =
1111+
Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
1112+
IRBuilder<> B(PreHeaderBR);
1113+
Value *Cond =
1114+
B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
1115+
B.CreateCondBr(Cond, NewPreHeader, InsertTop);
1116+
PreHeaderBR->eraseFromParent();
1117+
1118+
// PreHeader now dominates InsertTop.
1119+
DT.changeImmediateDominator(InsertTop, PreHeader);
1120+
1121+
// If we branch from PreHeader to InsertTop, we are guaranteed to execute
1122+
// the peeled iteration, so the exit values from the original loop are
1123+
// dead. Use poison for them.
1124+
for (auto &PN : InsertTop->phis())
1125+
PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader);
1126+
}
10921127
} else {
10931128
// It is convenient to split the preheader into 3 parts - two blocks to
10941129
// anchor the peeled copy of the loop body, and a new preheader for the
@@ -1162,8 +1197,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11621197
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
11631198
SmallVector<BasicBlock *, 8> NewBlocks;
11641199

1165-
cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166-
NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
1200+
cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
1201+
NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
1202+
LoopBlocks, VMap, LVMap, &DT, LI,
11671203
LoopLocalNoAliasDeclScopes, *SE);
11681204

11691205
// Remap to use values from the current iteration instead of the

llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ define i64 @peel_single_block_loop_iv_step_1() {
1212
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i64 [[IV_NEXT1]], 63
1313
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
1414
; CHECK: [[EXIT_PEEL_BEGIN]]:
15-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
1615
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
16+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
1717
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
1818
; CHECK: [[LOOP_PEEL]]:
1919
; CHECK-NEXT: [[CMP18_NOT:%.*]] = icmp eq i64 [[IV]], 63
@@ -91,8 +91,8 @@ define i64 @peel_single_block_loop_iv_step_1_eq_pred() {
9191
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_LCSSA]], 63
9292
; CHECK-NEXT: br i1 [[CMP_PEEL]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
9393
; CHECK: [[EXIT_PEEL_BEGIN]]:
94-
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
9594
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
95+
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
9696
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
9797
; CHECK: [[LOOP_PEEL]]:
9898
; CHECK-NEXT: [[CMP_PEEL1:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -170,8 +170,8 @@ define i64 @peel_single_block_loop_iv_step_1_nested_loop() {
170170
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
171171
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[OUTER_LATCH_PEEL_BEGIN:.*]], !llvm.loop [[LOOP3:![0-9]+]]
172172
; CHECK: [[OUTER_LATCH_PEEL_BEGIN]]:
173-
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
174173
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
174+
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
175175
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
176176
; CHECK: [[LOOP_PEEL]]:
177177
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -236,8 +236,8 @@ define i64 @peel_multi_block_loop_iv_step_1() {
236236
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
237237
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP4:![0-9]+]]
238238
; CHECK: [[EXIT_PEEL_BEGIN]]:
239-
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
240239
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LATCH]] ]
240+
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
241241
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
242242
; CHECK: [[LOOP_PEEL]]:
243243
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -364,8 +364,8 @@ define i64 @peel_single_block_loop_iv_step_1_btc_1() {
364364
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
365365
; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP5:![0-9]+]]
366366
; CHECK: [[EXIT_PEEL_BEGIN]]:
367-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
368367
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
368+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
369369
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
370370
; CHECK: [[LOOP_PEEL]]:
371371
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 1
@@ -483,9 +483,9 @@ define i32 @peel_loop_with_branch_and_phi_uses(ptr %x, i1 %c) {
483483
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], 99
484484
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP_HEADER]], label %[[LOOPEXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP6:![0-9]+]]
485485
; CHECK: [[LOOPEXIT_PEEL_BEGIN]]:
486-
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
487-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
488486
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
487+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
488+
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
489489
; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
490490
; CHECK: [[LOOP_HEADER_PEEL]]:
491491
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[IV]], 99

llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,38 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi(i32 %n) {
99
; CHECK-SAME: i32 [[N:%.*]]) {
1010
; CHECK-NEXT: [[ENTRY:.*]]:
1111
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1
12+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[SUB]], 0
13+
; CHECK-NEXT: br i1 [[TMP0]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
14+
; CHECK: [[ENTRY_SPLIT]]:
1215
; CHECK-NEXT: br label %[[LOOP:.*]]
1316
; CHECK: [[LOOP]]:
14-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
17+
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
18+
; CHECK-NEXT: call void @foo(i32 2)
19+
; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1
20+
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], 1
21+
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP1]]
22+
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
23+
; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
24+
; CHECK-NEXT: [[SEL_LCSSA_PH:%.*]] = phi i32 [ 2, %[[LOOP]] ]
25+
; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
26+
; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]]
27+
; CHECK: [[EXIT_PEEL_BEGIN]]:
28+
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[SEL_LCSSA_PH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
29+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
30+
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
31+
; CHECK: [[LOOP_PEEL]]:
1532
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
1633
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 1, i32 2
1734
; CHECK-NEXT: call void @foo(i32 [[SEL]])
18-
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
35+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[IV]], 1
1936
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]]
20-
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
37+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
38+
; CHECK: [[EXIT_PEEL_NEXT]]:
39+
; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
40+
; CHECK: [[LOOP_PEEL_NEXT]]:
41+
; CHECK-NEXT: br label %[[EXIT:.*]]
2142
; CHECK: [[EXIT]]:
22-
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ]
23-
; CHECK-NEXT: ret i32 [[SEL_LCSSA]]
43+
; CHECK-NEXT: ret i32 [[SEL]]
2444
;
2545
entry:
2646
%sub = add i32 %n, -1
@@ -160,21 +180,44 @@ define void @peel_last_with_trip_count_check_nested_loop(i32 %n) {
160180
; CHECK-NEXT: [[ENTRY:.*:]]
161181
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1
162182
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
163-
; CHECK: [[OUTER_HEADER_LOOPEXIT:.*]]:
183+
; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]]:
184+
; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1:%.*]], %[[INNER_LATCH:.*]] ]
185+
; CHECK-NEXT: br label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN:.*]]
186+
; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN]]:
187+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[DOTPH]], %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]] ]
188+
; CHECK-NEXT: br label %[[INNER_HEADER_PEEL:.*]]
189+
; CHECK: [[INNER_HEADER_PEEL]]:
190+
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
191+
; CHECK-NEXT: br i1 [[C]], label %[[INNER_LATCH_PEEL:.*]], label %[[THEN_PEEL:.*]]
192+
; CHECK: [[THEN_PEEL]]:
193+
; CHECK-NEXT: call void @foo(i32 1)
194+
; CHECK-NEXT: br label %[[INNER_LATCH_PEEL]]
195+
; CHECK: [[INNER_LATCH_PEEL]]:
196+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[IV]], 1
197+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
198+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_NEXT:.*]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_NEXT]]
199+
; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_NEXT]]:
200+
; CHECK-NEXT: br label %[[INNER_HEADER_PEEL_NEXT:.*]]
201+
; CHECK: [[INNER_HEADER_PEEL_NEXT]]:
202+
; CHECK-NEXT: br label %[[OUTER_HEADER_LOOPEXIT:.*]]
203+
; CHECK: [[OUTER_HEADER_LOOPEXIT]]:
164204
; CHECK-NEXT: br label %[[OUTER_HEADER]]
165205
; CHECK: [[OUTER_HEADER]]:
206+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[SUB]], 0
207+
; CHECK-NEXT: br i1 [[TMP1]], label %[[OUTER_HEADER_SPLIT:.*]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN]]
208+
; CHECK: [[OUTER_HEADER_SPLIT]]:
166209
; CHECK-NEXT: br label %[[INNER_HEADER:.*]]
167210
; CHECK: [[INNER_HEADER]]:
168-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[IV_NEXT:%.*]], %[[INNER_LATCH:.*]] ]
169-
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
170-
; CHECK-NEXT: br i1 [[C]], label %[[INNER_LATCH]], label %[[THEN:.*]]
211+
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[OUTER_HEADER_SPLIT]] ], [ [[IV_NEXT1]], %[[INNER_LATCH]] ]
212+
; CHECK-NEXT: br i1 false, label %[[INNER_LATCH]], label %[[THEN:.*]]
171213
; CHECK: [[THEN]]:
172214
; CHECK-NEXT: call void @foo(i32 1)
173215
; CHECK-NEXT: br label %[[INNER_LATCH]]
174216
; CHECK: [[INNER_LATCH]]:
175-
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
176-
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
177-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[OUTER_HEADER_LOOPEXIT]], label %[[INNER_HEADER]]
217+
; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1
218+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1
219+
; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[IV_NEXT1]], [[TMP2]]
220+
; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]]
178221
;
179222
entry:
180223
%sub = add i32 %n, -1
@@ -197,3 +240,8 @@ inner.latch:
197240
%exitcond.not = icmp eq i32 %iv.next, %n
198241
br i1 %exitcond.not, label %outer.header, label %inner.header
199242
}
243+
;.
244+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
245+
; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
246+
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
247+
;.

llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ define i32 @peel_last_iter_of_outer_lcssa_phi_with_constant_after_unrolling_inne
1919
; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 999
2020
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[OUTER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
2121
; CHECK: [[EXIT_PEEL_BEGIN]]:
22-
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ]
2322
; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ 1, %[[INNER_LATCH]] ]
23+
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ]
2424
; CHECK-NEXT: br label %[[OUTER_HEADER_PEEL:.*]]
2525
; CHECK: [[OUTER_HEADER_PEEL]]:
2626
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i16 [[IV_NEXT_LCSSA]], 999

0 commit comments

Comments
 (0)