38
38
#include " llvm/Transforms/Utils/Cloning.h"
39
39
#include " llvm/Transforms/Utils/LoopSimplify.h"
40
40
#include " llvm/Transforms/Utils/LoopUtils.h"
41
- #include " llvm/Transforms/Utils/ScalarEvolutionExpander.h"
42
41
#include " llvm/Transforms/Utils/ValueMapper.h"
43
42
#include < algorithm>
44
43
#include < cassert>
@@ -331,7 +330,11 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
331
330
332
331
bool llvm::canPeelLastIteration (const Loop &L, ScalarEvolution &SE) {
333
332
const SCEV *BTC = SE.getBackedgeTakenCount (&L);
334
- if (isa<SCEVCouldNotCompute>(BTC))
333
+ // The loop must execute at least 2 iterations to guarantee that peeled
334
+ // iteration executes.
335
+ // TODO: Add checks during codegen.
336
+ if (isa<SCEVCouldNotCompute>(BTC) ||
337
+ !SE.isKnownPredicate (CmpInst::ICMP_UGT, BTC, SE.getZero (BTC->getType ())))
335
338
return false ;
336
339
337
340
// Check if the exit condition of the loop can be adjusted by the peeling
@@ -361,18 +364,12 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
361
364
// / is known at the second-to-last.
362
365
static bool shouldPeelLastIteration (Loop &L, CmpPredicate Pred,
363
366
const SCEVAddRecExpr *LeftAR,
364
- const SCEV *RightSCEV, ScalarEvolution &SE,
365
- const TargetTransformInfo &TTI ) {
367
+ const SCEV *RightSCEV,
368
+ ScalarEvolution &SE ) {
366
369
if (!canPeelLastIteration (L, SE))
367
370
return false ;
368
371
369
372
const SCEV *BTC = SE.getBackedgeTakenCount (&L);
370
- SCEVExpander Expander (SE, L.getHeader ()->getDataLayout (), " loop-peel" );
371
- if (!SE.isKnownNonZero (BTC) &&
372
- Expander.isHighCostExpansion (BTC, &L, SCEVCheapExpansionBudget, &TTI,
373
- L.getLoopPredecessor ()->getTerminator ()))
374
- return false ;
375
-
376
373
const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration (BTC, SE);
377
374
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration (
378
375
SE.getMinusSCEV (BTC, SE.getOne (BTC->getType ())), SE);
@@ -394,8 +391,7 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
394
391
// ..
395
392
// }
396
393
static std::pair<unsigned , unsigned >
397
- countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
398
- const TargetTransformInfo &TTI) {
394
+ countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
399
395
assert (L.isLoopSimplifyForm () && " Loop needs to be in loop simplify form" );
400
396
unsigned DesiredPeelCount = 0 ;
401
397
unsigned DesiredPeelCountLast = 0 ;
@@ -483,7 +479,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
483
479
const SCEV *Step = LeftAR->getStepRecurrence (SE);
484
480
if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
485
481
Pred)) {
486
- if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE, TTI ))
482
+ if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE))
487
483
DesiredPeelCountLast = 1 ;
488
484
return ;
489
485
}
@@ -597,8 +593,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
597
593
void llvm::computePeelCount (Loop *L, unsigned LoopSize,
598
594
TargetTransformInfo::PeelingPreferences &PP,
599
595
unsigned TripCount, DominatorTree &DT,
600
- ScalarEvolution &SE, const TargetTransformInfo &TTI ,
601
- AssumptionCache *AC, unsigned Threshold) {
596
+ ScalarEvolution &SE, AssumptionCache *AC ,
597
+ unsigned Threshold) {
602
598
assert (LoopSize > 0 && " Zero loop size is not allowed!" );
603
599
// Save the PP.PeelCount value set by the target in
604
600
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -660,7 +656,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
660
656
}
661
657
662
658
const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
663
- countToEliminateCompares (*L, MaxPeelCount, SE, TTI );
659
+ countToEliminateCompares (*L, MaxPeelCount, SE);
664
660
DesiredPeelCount = std::max (DesiredPeelCount, CountToEliminateCmps);
665
661
666
662
if (DesiredPeelCount == 0 )
@@ -826,7 +822,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
826
822
// / instructions in the last peeled-off iteration.
827
823
static void cloneLoopBlocks (
828
824
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
829
- BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
825
+ BasicBlock *InsertBot,
830
826
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
831
827
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
832
828
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -918,22 +914,12 @@ static void cloneLoopBlocks(
918
914
// loop iteration. Since this copy is no longer part of the loop, we
919
915
// resolve this statically:
920
916
if (PeelLast) {
921
- // For the last iteration, we introduce new phis for each header phi in
922
- // InsertTop, using the incoming value from the preheader for the original
923
- // preheader (when skipping the main loop) and the incoming value from the
924
- // latch for the latch (when continuing from the main loop).
925
- IRBuilder<> B (InsertTop, InsertTop->getFirstNonPHIIt ());
917
+ // For the last iteration, we use the value from the latch of the original
918
+ // loop directly.
926
919
for (BasicBlock::iterator I = Header->begin (); isa<PHINode>(I); ++I) {
927
920
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
928
- PHINode *PN = B. CreatePHI ( NewPHI->getType (), 2 );
921
+ VMap[&*I] = NewPHI->getIncomingValueForBlock (Latch );
929
922
NewPHI->eraseFromParent ();
930
- if (OrigPreHeader)
931
- PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (PreHeader),
932
- OrigPreHeader);
933
-
934
- PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (Latch),
935
- Latch);
936
- VMap[&*I] = PN;
937
923
}
938
924
} else {
939
925
// For the first iteration, we use the value from the preheader directly.
@@ -1067,7 +1053,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1067
1053
// Set up all the necessary basic blocks.
1068
1054
BasicBlock *InsertTop;
1069
1055
BasicBlock *InsertBot;
1070
- BasicBlock *NewPreHeader = nullptr ;
1056
+ BasicBlock *NewPreHeader;
1071
1057
DenseMap<Instruction *, Value *> ExitValues;
1072
1058
if (PeelLast) {
1073
1059
// It is convenient to split the single exit block from the latch the
@@ -1098,34 +1084,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1098
1084
for (PHINode &P : Exit->phis ())
1099
1085
ExitValues[&P] = P.getIncomingValueForBlock (Latch);
1100
1086
1101
- const SCEV *BTC = SE->getBackedgeTakenCount (L);
1102
-
1103
1087
InsertTop = SplitEdge (Latch, Exit, &DT, LI);
1104
1088
InsertBot = SplitBlock (InsertTop, InsertTop->getTerminator (), &DT, LI);
1105
1089
1106
1090
InsertTop->setName (Exit->getName () + " .peel.begin" );
1107
1091
InsertBot->setName (Exit->getName () + " .peel.next" );
1108
- NewPreHeader = nullptr ;
1109
-
1110
- // If the original loop may only execute a single iteration we need to
1111
- // insert a trip count check and skip the original loop with the last
1112
- // iteration peeled off if necessary.
1113
- if (!SE->isKnownNonZero (BTC)) {
1114
- NewPreHeader = SplitEdge (PreHeader, Header, &DT, LI);
1115
- SCEVExpander Expander (*SE, Latch->getDataLayout (), " loop-peel" );
1116
-
1117
- BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator ());
1118
- Value *BTCValue =
1119
- Expander.expandCodeFor (BTC, BTC->getType (), PreHeaderBR);
1120
- IRBuilder<> B (PreHeaderBR);
1121
- Value *Cond =
1122
- B.CreateICmpNE (BTCValue, ConstantInt::get (BTCValue->getType (), 0 ));
1123
- B.CreateCondBr (Cond, NewPreHeader, InsertTop);
1124
- PreHeaderBR->eraseFromParent ();
1125
-
1126
- // PreHeader now dominates InsertTop.
1127
- DT.changeImmediateDominator (InsertTop, PreHeader);
1128
- }
1129
1092
} else {
1130
1093
// It is convenient to split the preheader into 3 parts - two blocks to
1131
1094
// anchor the peeled copy of the loop body, and a new preheader for the
@@ -1199,9 +1162,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1199
1162
for (unsigned Iter = 0 ; Iter < PeelCount; ++Iter) {
1200
1163
SmallVector<BasicBlock *, 8 > NewBlocks;
1201
1164
1202
- cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot,
1203
- NewPreHeader ? PreHeader : nullptr , ExitEdges, NewBlocks,
1204
- LoopBlocks, VMap, LVMap, &DT, LI,
1165
+ cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166
+ NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
1205
1167
LoopLocalNoAliasDeclScopes, *SE);
1206
1168
1207
1169
// Remap to use values from the current iteration instead of the
@@ -1254,11 +1216,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1254
1216
1255
1217
if (PeelLast) {
1256
1218
// Now adjust users of the original exit values by replacing them with the
1257
- // exit value from the peeled iteration and remove them .
1258
- for (const auto &[P, E] : ExitValues) {
1219
+ // exit value from the peeled iteration.
1220
+ for (const auto &[P, E] : ExitValues)
1259
1221
P->replaceAllUsesWith (isa<Constant>(E) ? E : &*VMap.lookup (E));
1260
- P->eraseFromParent ();
1261
- }
1262
1222
formLCSSA (*L, DT, LI, SE);
1263
1223
} else {
1264
1224
// Now adjust the phi nodes in the loop header to get their initial values
0 commit comments