Skip to content

Commit 5804545

Browse files
committed
Revert "[LoopPeel] Remove known trip count restriction when peeling last. (#140792)"
This reverts commit 24b9775. Also reverts ac9a466. Building CMake triggers a crash with the patch, revert while I investigate.
1 parent ad58ea3 commit 5804545

File tree

8 files changed

+48
-232
lines changed

8 files changed

+48
-232
lines changed

llvm/include/llvm/Transforms/Utils/LoopPeel.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
4444
void computePeelCount(Loop *L, unsigned LoopSize,
4545
TargetTransformInfo::PeelingPreferences &PP,
4646
unsigned TripCount, DominatorTree &DT,
47-
ScalarEvolution &SE, const TargetTransformInfo &TTI,
48-
AssumptionCache *AC = nullptr,
47+
ScalarEvolution &SE, AssumptionCache *AC = nullptr,
4948
unsigned Threshold = UINT_MAX);
5049

5150
} // end namespace llvm

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ bool llvm::computeUnrollCount(
10141014
}
10151015

10161016
// 5th priority is loop peeling.
1017-
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, TTI, AC, UP.Threshold);
1017+
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
10181018
if (PP.PeelCount) {
10191019
UP.Runtime = false;
10201020
UP.Count = 1;

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 21 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
#include "llvm/Transforms/Utils/Cloning.h"
3939
#include "llvm/Transforms/Utils/LoopSimplify.h"
4040
#include "llvm/Transforms/Utils/LoopUtils.h"
41-
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
4241
#include "llvm/Transforms/Utils/ValueMapper.h"
4342
#include <algorithm>
4443
#include <cassert>
@@ -331,7 +330,11 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
331330

332331
bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
333332
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
334-
if (isa<SCEVCouldNotCompute>(BTC))
333+
// The loop must execute at least 2 iterations to guarantee that peeled
334+
// iteration executes.
335+
// TODO: Add checks during codegen.
336+
if (isa<SCEVCouldNotCompute>(BTC) ||
337+
!SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType())))
335338
return false;
336339

337340
// Check if the exit condition of the loop can be adjusted by the peeling
@@ -361,18 +364,12 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
361364
/// is known at the second-to-last.
362365
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
363366
const SCEVAddRecExpr *LeftAR,
364-
const SCEV *RightSCEV, ScalarEvolution &SE,
365-
const TargetTransformInfo &TTI) {
367+
const SCEV *RightSCEV,
368+
ScalarEvolution &SE) {
366369
if (!canPeelLastIteration(L, SE))
367370
return false;
368371

369372
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
370-
SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
371-
if (!SE.isKnownNonZero(BTC) &&
372-
Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
373-
L.getLoopPredecessor()->getTerminator()))
374-
return false;
375-
376373
const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
377374
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
378375
SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
@@ -394,8 +391,7 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
394391
// ..
395392
// }
396393
static std::pair<unsigned, unsigned>
397-
countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
398-
const TargetTransformInfo &TTI) {
394+
countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
399395
assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
400396
unsigned DesiredPeelCount = 0;
401397
unsigned DesiredPeelCountLast = 0;
@@ -483,7 +479,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
483479
const SCEV *Step = LeftAR->getStepRecurrence(SE);
484480
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
485481
Pred)) {
486-
if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
482+
if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE))
487483
DesiredPeelCountLast = 1;
488484
return;
489485
}
@@ -597,8 +593,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
597593
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
598594
TargetTransformInfo::PeelingPreferences &PP,
599595
unsigned TripCount, DominatorTree &DT,
600-
ScalarEvolution &SE, const TargetTransformInfo &TTI,
601-
AssumptionCache *AC, unsigned Threshold) {
596+
ScalarEvolution &SE, AssumptionCache *AC,
597+
unsigned Threshold) {
602598
assert(LoopSize > 0 && "Zero loop size is not allowed!");
603599
// Save the PP.PeelCount value set by the target in
604600
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -660,7 +656,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
660656
}
661657

662658
const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
663-
countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
659+
countToEliminateCompares(*L, MaxPeelCount, SE);
664660
DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
665661

666662
if (DesiredPeelCount == 0)
@@ -826,7 +822,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
826822
/// instructions in the last peeled-off iteration.
827823
static void cloneLoopBlocks(
828824
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
829-
BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
825+
BasicBlock *InsertBot,
830826
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
831827
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
832828
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -918,22 +914,12 @@ static void cloneLoopBlocks(
918914
// loop iteration. Since this copy is no longer part of the loop, we
919915
// resolve this statically:
920916
if (PeelLast) {
921-
// For the last iteration, we introduce new phis for each header phi in
922-
// InsertTop, using the incoming value from the preheader for the original
923-
// preheader (when skipping the main loop) and the incoming value from the
924-
// latch for the latch (when continuing from the main loop).
925-
IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt());
917+
// For the last iteration, we use the value from the latch of the original
918+
// loop directly.
926919
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
927920
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
928-
PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
921+
VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch);
929922
NewPHI->eraseFromParent();
930-
if (OrigPreHeader)
931-
PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
932-
OrigPreHeader);
933-
934-
PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
935-
Latch);
936-
VMap[&*I] = PN;
937923
}
938924
} else {
939925
// For the first iteration, we use the value from the preheader directly.
@@ -1067,7 +1053,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10671053
// Set up all the necessary basic blocks.
10681054
BasicBlock *InsertTop;
10691055
BasicBlock *InsertBot;
1070-
BasicBlock *NewPreHeader = nullptr;
1056+
BasicBlock *NewPreHeader;
10711057
DenseMap<Instruction *, Value *> ExitValues;
10721058
if (PeelLast) {
10731059
// It is convenient to split the single exit block from the latch the
@@ -1098,34 +1084,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10981084
for (PHINode &P : Exit->phis())
10991085
ExitValues[&P] = P.getIncomingValueForBlock(Latch);
11001086

1101-
const SCEV *BTC = SE->getBackedgeTakenCount(L);
1102-
11031087
InsertTop = SplitEdge(Latch, Exit, &DT, LI);
11041088
InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
11051089

11061090
InsertTop->setName(Exit->getName() + ".peel.begin");
11071091
InsertBot->setName(Exit->getName() + ".peel.next");
1108-
NewPreHeader = nullptr;
1109-
1110-
// If the original loop may only execute a single iteration we need to
1111-
// insert a trip count check and skip the original loop with the last
1112-
// iteration peeled off if necessary.
1113-
if (!SE->isKnownNonZero(BTC)) {
1114-
NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
1115-
SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
1116-
1117-
BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1118-
Value *BTCValue =
1119-
Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
1120-
IRBuilder<> B(PreHeaderBR);
1121-
Value *Cond =
1122-
B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
1123-
B.CreateCondBr(Cond, NewPreHeader, InsertTop);
1124-
PreHeaderBR->eraseFromParent();
1125-
1126-
// PreHeader now dominates InsertTop.
1127-
DT.changeImmediateDominator(InsertTop, PreHeader);
1128-
}
11291092
} else {
11301093
// It is convenient to split the preheader into 3 parts - two blocks to
11311094
// anchor the peeled copy of the loop body, and a new preheader for the
@@ -1199,9 +1162,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11991162
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
12001163
SmallVector<BasicBlock *, 8> NewBlocks;
12011164

1202-
cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
1203-
NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
1204-
LoopBlocks, VMap, LVMap, &DT, LI,
1165+
cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166+
NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
12051167
LoopLocalNoAliasDeclScopes, *SE);
12061168

12071169
// Remap to use values from the current iteration instead of the
@@ -1254,11 +1216,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
12541216

12551217
if (PeelLast) {
12561218
// Now adjust users of the original exit values by replacing them with the
1257-
// exit value from the peeled iteration and remove them.
1258-
for (const auto &[P, E] : ExitValues) {
1219+
// exit value from the peeled iteration.
1220+
for (const auto &[P, E] : ExitValues)
12591221
P->replaceAllUsesWith(isa<Constant>(E) ? E : &*VMap.lookup(E));
1260-
P->eraseFromParent();
1261-
}
12621222
formLCSSA(*L, DT, LI, SE);
12631223
} else {
12641224
// Now adjust the phi nodes in the loop header to get their initial values

llvm/test/Transforms/LoopUnroll/peel-last-iteration-debug.ll

Lines changed: 0 additions & 76 deletions
This file was deleted.

llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -25,41 +25,17 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
2525
; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
2626
; BUDGET3-NEXT: [[ENTRY:.*]]:
2727
; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
28-
; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]]
29-
; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
30-
; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
31-
; BUDGET3: [[ENTRY_SPLIT]]:
3228
; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]]
3329
; BUDGET3: [[LOOP_HEADER]]:
34-
; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
35-
; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
36-
; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
30+
; BUDGET3-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT_PEEL:%.*]], %[[LOOP_LATCH:.*]] ]
31+
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
32+
; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
3733
; BUDGET3: [[THEN]]:
3834
; BUDGET3-NEXT: br label %[[LOOP_LATCH]]
3935
; BUDGET3: [[LOOP_LATCH]]:
40-
; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
41-
; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1
42-
; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
43-
; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
44-
; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
45-
; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
46-
; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]]
47-
; BUDGET3: [[EXIT_PEEL_BEGIN]]:
48-
; BUDGET3-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
49-
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
50-
; BUDGET3: [[LOOP_HEADER_PEEL]]:
51-
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
52-
; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]]
53-
; BUDGET3: [[THEN_PEEL]]:
54-
; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]]
55-
; BUDGET3: [[LOOP_LATCH_PEEL]]:
56-
; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1
36+
; BUDGET3-NEXT: [[IV_NEXT_PEEL]] = add nsw i32 [[TMP3]], 1
5737
; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]]
58-
; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
59-
; BUDGET3: [[EXIT_PEEL_NEXT]]:
60-
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
61-
; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]:
62-
; BUDGET3-NEXT: br label %[[EXIT:.*]]
38+
; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
6339
; BUDGET3: [[EXIT]]:
6440
; BUDGET3-NEXT: ret i32 0
6541
;
@@ -83,7 +59,3 @@ loop.latch:
8359
exit:
8460
ret i32 0
8561
}
86-
;.
87-
; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
88-
; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
89-
;.

0 commit comments

Comments
 (0)