3838#include " llvm/Transforms/Utils/Cloning.h"
3939#include " llvm/Transforms/Utils/LoopSimplify.h"
4040#include " llvm/Transforms/Utils/LoopUtils.h"
41- #include " llvm/Transforms/Utils/ScalarEvolutionExpander.h"
4241#include " llvm/Transforms/Utils/ValueMapper.h"
4342#include < algorithm>
4443#include < cassert>
@@ -331,7 +330,11 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
331330
332331bool llvm::canPeelLastIteration (const Loop &L, ScalarEvolution &SE) {
333332 const SCEV *BTC = SE.getBackedgeTakenCount (&L);
334- if (isa<SCEVCouldNotCompute>(BTC))
333+ // The loop must execute at least 2 iterations to guarantee that peeled
334+ // iteration executes.
335+ // TODO: Add checks during codegen.
336+ if (isa<SCEVCouldNotCompute>(BTC) ||
337+ !SE.isKnownPredicate (CmpInst::ICMP_UGT, BTC, SE.getZero (BTC->getType ())))
335338 return false ;
336339
337340 // Check if the exit condition of the loop can be adjusted by the peeling
@@ -361,18 +364,12 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
361364// / is known at the second-to-last.
362365static bool shouldPeelLastIteration (Loop &L, CmpPredicate Pred,
363366 const SCEVAddRecExpr *LeftAR,
364- const SCEV *RightSCEV, ScalarEvolution &SE,
365- const TargetTransformInfo &TTI ) {
367+ const SCEV *RightSCEV,
368+ ScalarEvolution &SE ) {
366369 if (!canPeelLastIteration (L, SE))
367370 return false ;
368371
369372 const SCEV *BTC = SE.getBackedgeTakenCount (&L);
370- SCEVExpander Expander (SE, L.getHeader ()->getDataLayout (), " loop-peel" );
371- if (!SE.isKnownNonZero (BTC) &&
372- Expander.isHighCostExpansion (BTC, &L, SCEVCheapExpansionBudget, &TTI,
373- L.getLoopPredecessor ()->getTerminator ()))
374- return false ;
375-
376373 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration (BTC, SE);
377374 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration (
378375 SE.getMinusSCEV (BTC, SE.getOne (BTC->getType ())), SE);
@@ -394,8 +391,7 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
394391// ..
395392// }
396393static std::pair<unsigned , unsigned >
397- countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
398- const TargetTransformInfo &TTI) {
394+ countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
399395 assert (L.isLoopSimplifyForm () && " Loop needs to be in loop simplify form" );
400396 unsigned DesiredPeelCount = 0 ;
401397 unsigned DesiredPeelCountLast = 0 ;
@@ -483,7 +479,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
483479 const SCEV *Step = LeftAR->getStepRecurrence (SE);
484480 if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
485481 Pred)) {
486- if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE, TTI ))
482+ if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE))
487483 DesiredPeelCountLast = 1 ;
488484 return ;
489485 }
@@ -597,8 +593,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
597593void llvm::computePeelCount (Loop *L, unsigned LoopSize,
598594 TargetTransformInfo::PeelingPreferences &PP,
599595 unsigned TripCount, DominatorTree &DT,
600- ScalarEvolution &SE, const TargetTransformInfo &TTI ,
601- AssumptionCache *AC, unsigned Threshold) {
596+ ScalarEvolution &SE, AssumptionCache *AC ,
597+ unsigned Threshold) {
602598 assert (LoopSize > 0 && " Zero loop size is not allowed!" );
603599 // Save the PP.PeelCount value set by the target in
604600 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -660,7 +656,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
660656 }
661657
662658 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
663- countToEliminateCompares (*L, MaxPeelCount, SE, TTI );
659+ countToEliminateCompares (*L, MaxPeelCount, SE);
664660 DesiredPeelCount = std::max (DesiredPeelCount, CountToEliminateCmps);
665661
666662 if (DesiredPeelCount == 0 )
@@ -826,7 +822,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
826822// / instructions in the last peeled-off iteration.
827823static void cloneLoopBlocks (
828824 Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
829- BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
825+ BasicBlock *InsertBot,
830826 SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
831827 SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
832828 ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -918,22 +914,12 @@ static void cloneLoopBlocks(
918914 // loop iteration. Since this copy is no longer part of the loop, we
919915 // resolve this statically:
920916 if (PeelLast) {
921- // For the last iteration, we introduce new phis for each header phi in
922- // InsertTop, using the incoming value from the preheader for the original
923- // preheader (when skipping the main loop) and the incoming value from the
924- // latch for the latch (when continuing from the main loop).
925- IRBuilder<> B (InsertTop, InsertTop->getFirstNonPHIIt ());
917+ // For the last iteration, we use the value from the latch of the original
918+ // loop directly.
926919 for (BasicBlock::iterator I = Header->begin (); isa<PHINode>(I); ++I) {
927920 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
928- PHINode *PN = B. CreatePHI ( NewPHI->getType (), 2 );
921+ VMap[&*I] = NewPHI->getIncomingValueForBlock (Latch );
929922 NewPHI->eraseFromParent ();
930- if (OrigPreHeader)
931- PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (PreHeader),
932- OrigPreHeader);
933-
934- PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (Latch),
935- Latch);
936- VMap[&*I] = PN;
937923 }
938924 } else {
939925 // For the first iteration, we use the value from the preheader directly.
@@ -1067,7 +1053,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10671053 // Set up all the necessary basic blocks.
10681054 BasicBlock *InsertTop;
10691055 BasicBlock *InsertBot;
1070- BasicBlock *NewPreHeader = nullptr ;
1056+ BasicBlock *NewPreHeader;
10711057 DenseMap<Instruction *, Value *> ExitValues;
10721058 if (PeelLast) {
10731059 // It is convenient to split the single exit block from the latch the
@@ -1098,34 +1084,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10981084 for (PHINode &P : Exit->phis ())
10991085 ExitValues[&P] = P.getIncomingValueForBlock (Latch);
11001086
1101- const SCEV *BTC = SE->getBackedgeTakenCount (L);
1102-
11031087 InsertTop = SplitEdge (Latch, Exit, &DT, LI);
11041088 InsertBot = SplitBlock (InsertTop, InsertTop->getTerminator (), &DT, LI);
11051089
11061090 InsertTop->setName (Exit->getName () + " .peel.begin" );
11071091 InsertBot->setName (Exit->getName () + " .peel.next" );
1108- NewPreHeader = nullptr ;
1109-
1110- // If the original loop may only execute a single iteration we need to
1111- // insert a trip count check and skip the original loop with the last
1112- // iteration peeled off if necessary.
1113- if (!SE->isKnownNonZero (BTC)) {
1114- NewPreHeader = SplitEdge (PreHeader, Header, &DT, LI);
1115- SCEVExpander Expander (*SE, Latch->getDataLayout (), " loop-peel" );
1116-
1117- BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator ());
1118- Value *BTCValue =
1119- Expander.expandCodeFor (BTC, BTC->getType (), PreHeaderBR);
1120- IRBuilder<> B (PreHeaderBR);
1121- Value *Cond =
1122- B.CreateICmpNE (BTCValue, ConstantInt::get (BTCValue->getType (), 0 ));
1123- B.CreateCondBr (Cond, NewPreHeader, InsertTop);
1124- PreHeaderBR->eraseFromParent ();
1125-
1126- // PreHeader now dominates InsertTop.
1127- DT.changeImmediateDominator (InsertTop, PreHeader);
1128- }
11291092 } else {
11301093 // It is convenient to split the preheader into 3 parts - two blocks to
11311094 // anchor the peeled copy of the loop body, and a new preheader for the
@@ -1199,9 +1162,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11991162 for (unsigned Iter = 0 ; Iter < PeelCount; ++Iter) {
12001163 SmallVector<BasicBlock *, 8 > NewBlocks;
12011164
1202- cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot,
1203- NewPreHeader ? PreHeader : nullptr , ExitEdges, NewBlocks,
1204- LoopBlocks, VMap, LVMap, &DT, LI,
1165+ cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166+ NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
12051167 LoopLocalNoAliasDeclScopes, *SE);
12061168
12071169 // Remap to use values from the current iteration instead of the
@@ -1254,11 +1216,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
12541216
12551217 if (PeelLast) {
12561218 // Now adjust users of the original exit values by replacing them with the
1257- // exit value from the peeled iteration and remove them .
1258- for (const auto &[P, E] : ExitValues) {
1219+ // exit value from the peeled iteration.
1220+ for (const auto &[P, E] : ExitValues)
12591221 P->replaceAllUsesWith (isa<Constant>(E) ? E : &*VMap.lookup (E));
1260- P->eraseFromParent ();
1261- }
12621222 formLCSSA (*L, DT, LI, SE);
12631223 } else {
12641224 // Now adjust the phi nodes in the loop header to get their initial values
0 commit comments