Skip to content

Commit 5ccea79

Browse files
committed
!fixup address comments, thanks
1 parent c651b2c commit 5ccea79

File tree

7 files changed

+60
-37
lines changed

7 files changed

+60
-37
lines changed

llvm/include/llvm/Transforms/Utils/LoopPeel.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
4444
void computePeelCount(Loop *L, unsigned LoopSize,
4545
TargetTransformInfo::PeelingPreferences &PP,
4646
unsigned TripCount, DominatorTree &DT,
47-
ScalarEvolution &SE, AssumptionCache *AC = nullptr,
47+
ScalarEvolution &SE, const TargetTransformInfo &TTI,
48+
AssumptionCache *AC = nullptr,
4849
unsigned Threshold = UINT_MAX);
4950

5051
} // end namespace llvm

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ bool llvm::computeUnrollCount(
10141014
}
10151015

10161016
// 5th priority is loop peeling.
1017-
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
1017+
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, TTI, AC, UP.Threshold);
10181018
if (PP.PeelCount) {
10191019
UP.Runtime = false;
10201020
UP.Count = 1;

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,17 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
361361
/// is known at the second-to-last.
362362
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
363363
const SCEVAddRecExpr *LeftAR,
364-
const SCEV *RightSCEV,
365-
ScalarEvolution &SE) {
364+
const SCEV *RightSCEV, ScalarEvolution &SE,
365+
const TargetTransformInfo &TTI) {
366366
if (!canPeelLastIteration(L, SE))
367367
return false;
368368

369369
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
370+
SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
371+
if (!SE.isKnownNonZero(BTC) && Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
372+
L.getLoopPredecessor()->getTerminator()))
373+
return false;
374+
370375
const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
371376
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
372377
SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
@@ -388,7 +393,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
388393
// ..
389394
// }
390395
static std::pair<unsigned, unsigned>
391-
countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
396+
countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
397+
const TargetTransformInfo &TTI) {
392398
assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
393399
unsigned DesiredPeelCount = 0;
394400
unsigned DesiredPeelCountLast = 0;
@@ -476,7 +482,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
476482
const SCEV *Step = LeftAR->getStepRecurrence(SE);
477483
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
478484
Pred)) {
479-
if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE))
485+
if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
480486
DesiredPeelCountLast = 1;
481487
return;
482488
}
@@ -590,8 +596,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
590596
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
591597
TargetTransformInfo::PeelingPreferences &PP,
592598
unsigned TripCount, DominatorTree &DT,
593-
ScalarEvolution &SE, AssumptionCache *AC,
594-
unsigned Threshold) {
599+
ScalarEvolution &SE, const TargetTransformInfo &TTI,
600+
AssumptionCache *AC, unsigned Threshold) {
595601
assert(LoopSize > 0 && "Zero loop size is not allowed!");
596602
// Save the PP.PeelCount value set by the target in
597603
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -653,7 +659,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
653659
}
654660

655661
const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
656-
countToEliminateCompares(*L, MaxPeelCount, SE);
662+
countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
657663
DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
658664

659665
if (DesiredPeelCount == 0)
@@ -911,9 +917,10 @@ static void cloneLoopBlocks(
911917
// loop iteration. Since this copy is no longer part of the loop, we
912918
// resolve this statically:
913919
if (PeelLast) {
914-
// For the last iteration, we use the value from the latch of the original
915-
// loop directly.
916-
//
920+
// For the last iteration, we introduce new phis for each header phi in
921+
// InsertTop, using the incoming value from the preheader for the original
922+
// preheader (when skipping the main loop) and the incoming value from the
923+
// latch for the latch (when continuing from the main loop).
917924
IRBuilder<> B(InsertTop->getTerminator());
918925
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
919926
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
@@ -1100,9 +1107,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11001107
NewPreHeader = nullptr;
11011108

11021109
// If the original loop may only execute a single iteration we need to
1103-
// insert a trip count check and skip the peeled loop if necessary.
1104-
if (!SE->isKnownPredicate(CmpInst::ICMP_UGT, BTC,
1105-
SE->getZero(BTC->getType()))) {
1110+
// insert a trip count check and skip the original loop with the last
1111+
// iteration peeled off if necessary.
1112+
if (!SE->isKnownNonZero(BTC)) {
11061113
NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
11071114
SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
11081115

@@ -1117,12 +1124,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11171124

11181125
// PreHeader now dominates InsertTop.
11191126
DT.changeImmediateDominator(InsertTop, PreHeader);
1120-
1121-
// If we branch from PreHeader to InsertTop, we are guaranteed to execute
1122-
// the peeled iteration, so the exit values from the original loop are
1123-
// dead. Use poison for them.
1124-
for (auto &PN : InsertTop->phis())
1125-
PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader);
11261127
}
11271128
} else {
11281129
// It is convenient to split the preheader into 3 parts - two blocks to
@@ -1252,9 +1253,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
12521253

12531254
if (PeelLast) {
12541255
// Now adjust users of the original exit values by replacing them with the
1255-
// exit value from the peeled iteration.
1256-
for (const auto &[P, E] : ExitValues)
1256+
// exit value from the peeled iteration and remove them.
1257+
for (const auto &[P, E] : ExitValues) {
12571258
P->replaceAllUsesWith(isa<Constant>(E) ? E : &*VMap.lookup(E));
1259+
P->eraseFromParent();
1260+
}
12581261
formLCSSA(*L, DT, LI, SE);
12591262
} else {
12601263
// Now adjust the phi nodes in the loop header to get their initial values

llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,41 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
2525
; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
2626
; BUDGET3-NEXT: [[ENTRY:.*]]:
2727
; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
28+
; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]]
29+
; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
30+
; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
31+
; BUDGET3: [[ENTRY_SPLIT]]:
2832
; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]]
2933
; BUDGET3: [[LOOP_HEADER]]:
30-
; BUDGET3-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT_PEEL:%.*]], %[[LOOP_LATCH:.*]] ]
31-
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
32-
; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
34+
; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
35+
; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
36+
; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
3337
; BUDGET3: [[THEN]]:
3438
; BUDGET3-NEXT: br label %[[LOOP_LATCH]]
3539
; BUDGET3: [[LOOP_LATCH]]:
36-
; BUDGET3-NEXT: [[IV_NEXT_PEEL]] = add nsw i32 [[TMP3]], 1
40+
; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
41+
; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1
42+
; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
43+
; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
44+
; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
45+
; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
46+
; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]]
47+
; BUDGET3: [[EXIT_PEEL_BEGIN]]:
48+
; BUDGET3-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
49+
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
50+
; BUDGET3: [[LOOP_HEADER_PEEL]]:
51+
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
52+
; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]]
53+
; BUDGET3: [[THEN_PEEL]]:
54+
; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]]
55+
; BUDGET3: [[LOOP_LATCH_PEEL]]:
56+
; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1
3757
; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]]
38-
; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
58+
; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
59+
; BUDGET3: [[EXIT_PEEL_NEXT]]:
60+
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
61+
; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]:
62+
; BUDGET3-NEXT: br label %[[EXIT:.*]]
3963
; BUDGET3: [[EXIT]]:
4064
; BUDGET3-NEXT: ret i32 0
4165
;
@@ -59,3 +83,7 @@ loop.latch:
5983
exit:
6084
ret i32 0
6185
}
86+
;.
87+
; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
88+
; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
89+
;.

llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ define i64 @peel_single_block_loop_iv_step_1() {
1212
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i64 [[IV_NEXT1]], 63
1313
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
1414
; CHECK: [[EXIT_PEEL_BEGIN]]:
15-
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
1615
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
1716
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
1817
; CHECK: [[LOOP_PEEL]]:
@@ -91,7 +90,6 @@ define i64 @peel_single_block_loop_iv_step_1_eq_pred() {
9190
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_LCSSA]], 63
9291
; CHECK-NEXT: br i1 [[CMP_PEEL]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
9392
; CHECK: [[EXIT_PEEL_BEGIN]]:
94-
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
9593
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
9694
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
9795
; CHECK: [[LOOP_PEEL]]:
@@ -170,7 +168,6 @@ define i64 @peel_single_block_loop_iv_step_1_nested_loop() {
170168
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
171169
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[OUTER_LATCH_PEEL_BEGIN:.*]], !llvm.loop [[LOOP3:![0-9]+]]
172170
; CHECK: [[OUTER_LATCH_PEEL_BEGIN]]:
173-
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
174171
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
175172
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
176173
; CHECK: [[LOOP_PEEL]]:
@@ -236,7 +233,6 @@ define i64 @peel_multi_block_loop_iv_step_1() {
236233
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
237234
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP4:![0-9]+]]
238235
; CHECK: [[EXIT_PEEL_BEGIN]]:
239-
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LATCH]] ]
240236
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
241237
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
242238
; CHECK: [[LOOP_PEEL]]:
@@ -364,7 +360,6 @@ define i64 @peel_single_block_loop_iv_step_1_btc_1() {
364360
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
365361
; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP5:![0-9]+]]
366362
; CHECK: [[EXIT_PEEL_BEGIN]]:
367-
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
368363
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
369364
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
370365
; CHECK: [[LOOP_PEEL]]:
@@ -483,7 +478,6 @@ define i32 @peel_loop_with_branch_and_phi_uses(ptr %x, i1 %c) {
483478
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], 99
484479
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP_HEADER]], label %[[LOOPEXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP6:![0-9]+]]
485480
; CHECK: [[LOOPEXIT_PEEL_BEGIN]]:
486-
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
487481
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
488482
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
489483
; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]

llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,9 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi(i32 %n) {
2121
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP1]]
2222
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
2323
; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
24-
; CHECK-NEXT: [[SEL_LCSSA_PH:%.*]] = phi i32 [ 2, %[[LOOP]] ]
2524
; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
2625
; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]]
2726
; CHECK: [[EXIT_PEEL_BEGIN]]:
28-
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[SEL_LCSSA_PH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
2927
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
3028
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
3129
; CHECK: [[LOOP_PEEL]]:

llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ define i32 @peel_last_iter_of_outer_lcssa_phi_with_constant_after_unrolling_inne
1919
; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 999
2020
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[OUTER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
2121
; CHECK: [[EXIT_PEEL_BEGIN]]:
22-
; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ 1, %[[INNER_LATCH]] ]
2322
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ]
2423
; CHECK-NEXT: br label %[[OUTER_HEADER_PEEL:.*]]
2524
; CHECK: [[OUTER_HEADER_PEEL]]:

0 commit comments

Comments
 (0)