@@ -361,12 +361,17 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
361361// / is known at the second-to-last.
362362static bool shouldPeelLastIteration (Loop &L, CmpPredicate Pred,
363363 const SCEVAddRecExpr *LeftAR,
364- const SCEV *RightSCEV,
365- ScalarEvolution &SE ) {
364+ const SCEV *RightSCEV, ScalarEvolution &SE,
365+ const TargetTransformInfo &TTI ) {
366366 if (!canPeelLastIteration (L, SE))
367367 return false ;
368368
369369 const SCEV *BTC = SE.getBackedgeTakenCount (&L);
370+ SCEVExpander Expander (SE, L.getHeader ()->getDataLayout (), " loop-peel" );
371+ if (!SE.isKnownNonZero (BTC) && Expander.isHighCostExpansion (BTC, &L, SCEVCheapExpansionBudget, &TTI,
372+ L.getLoopPredecessor ()->getTerminator ()))
373+ return false ;
374+
370375 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration (BTC, SE);
371376 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration (
372377 SE.getMinusSCEV (BTC, SE.getOne (BTC->getType ())), SE);
@@ -388,7 +393,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
388393// ..
389394// }
390395static std::pair<unsigned , unsigned >
391- countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
396+ countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
397+ const TargetTransformInfo &TTI) {
392398 assert (L.isLoopSimplifyForm () && " Loop needs to be in loop simplify form" );
393399 unsigned DesiredPeelCount = 0 ;
394400 unsigned DesiredPeelCountLast = 0 ;
@@ -476,7 +482,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
476482 const SCEV *Step = LeftAR->getStepRecurrence (SE);
477483 if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
478484 Pred)) {
479- if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE))
485+ if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE, TTI ))
480486 DesiredPeelCountLast = 1 ;
481487 return ;
482488 }
@@ -590,8 +596,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
590596void llvm::computePeelCount (Loop *L, unsigned LoopSize,
591597 TargetTransformInfo::PeelingPreferences &PP,
592598 unsigned TripCount, DominatorTree &DT,
593- ScalarEvolution &SE, AssumptionCache *AC ,
594- unsigned Threshold) {
599+ ScalarEvolution &SE, const TargetTransformInfo &TTI ,
600+ AssumptionCache *AC, unsigned Threshold) {
595601 assert (LoopSize > 0 && " Zero loop size is not allowed!" );
596602 // Save the PP.PeelCount value set by the target in
597603 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -653,7 +659,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
653659 }
654660
655661 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
656- countToEliminateCompares (*L, MaxPeelCount, SE);
662+ countToEliminateCompares (*L, MaxPeelCount, SE, TTI );
657663 DesiredPeelCount = std::max (DesiredPeelCount, CountToEliminateCmps);
658664
659665 if (DesiredPeelCount == 0 )
@@ -911,9 +917,10 @@ static void cloneLoopBlocks(
911917 // loop iteration. Since this copy is no longer part of the loop, we
912918 // resolve this statically:
913919 if (PeelLast) {
914- // For the last iteration, we use the value from the latch of the original
915- // loop directly.
916- //
920+ // For the last iteration, we introduce new phis for each header phi in
921+ // InsertTop, using the incoming value from the preheader for the original
922+ // preheader (when skipping the main loop) and the incoming value from the
923+ // latch for the latch (when continuing from the main loop).
917924 IRBuilder<> B (InsertTop->getTerminator ());
918925 for (BasicBlock::iterator I = Header->begin (); isa<PHINode>(I); ++I) {
919926 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
@@ -1100,9 +1107,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11001107 NewPreHeader = nullptr ;
11011108
11021109 // If the original loop may only execute a single iteration we need to
1103- // insert a trip count check and skip the peeled loop if necessary.
1104- if (!SE-> isKnownPredicate (CmpInst::ICMP_UGT, BTC,
1105- SE->getZero (BTC-> getType ()) )) {
1110+ // insert a trip count check and skip the original loop with the last
1111+ // iteration peeled off if necessary.
1112+ if (! SE->isKnownNonZero (BTC)) {
11061113 NewPreHeader = SplitEdge (PreHeader, Header, &DT, LI);
11071114 SCEVExpander Expander (*SE, Latch->getDataLayout (), " loop-peel" );
11081115
@@ -1117,12 +1124,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11171124
11181125 // PreHeader now dominates InsertTop.
11191126 DT.changeImmediateDominator (InsertTop, PreHeader);
1120-
1121- // If we branch from PreHeader to InsertTop, we are guaranteed to execute
1122- // the peeled iteration, so the exit values from the original loop are
1123- // dead. Use poison for them.
1124- for (auto &PN : InsertTop->phis ())
1125- PN.addIncoming (PoisonValue::get (PN.getType ()), PreHeader);
11261127 }
11271128 } else {
11281129 // It is convenient to split the preheader into 3 parts - two blocks to
@@ -1252,9 +1253,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
12521253
12531254 if (PeelLast) {
12541255 // Now adjust users of the original exit values by replacing them with the
1255- // exit value from the peeled iteration.
1256- for (const auto &[P, E] : ExitValues)
1256+ // exit value from the peeled iteration and remove them .
1257+ for (const auto &[P, E] : ExitValues) {
12571258 P->replaceAllUsesWith (isa<Constant>(E) ? E : &*VMap.lookup (E));
1259+ P->eraseFromParent ();
1260+ }
12581261 formLCSSA (*L, DT, LI, SE);
12591262 } else {
12601263 // Now adjust the phi nodes in the loop header to get their initial values
0 commit comments