3838#include " llvm/Transforms/Utils/Cloning.h"
3939#include " llvm/Transforms/Utils/LoopSimplify.h"
4040#include " llvm/Transforms/Utils/LoopUtils.h"
41+ #include " llvm/Transforms/Utils/ScalarEvolutionExpander.h"
4142#include " llvm/Transforms/Utils/ValueMapper.h"
4243#include < algorithm>
4344#include < cassert>
@@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
330331
331332bool llvm::canPeelLastIteration (const Loop &L, ScalarEvolution &SE) {
332333 const SCEV *BTC = SE.getBackedgeTakenCount (&L);
333- // The loop must execute at least 2 iterations to guarantee that peeled
334- // iteration executes.
335- // TODO: Add checks during codegen.
336- if (isa<SCEVCouldNotCompute>(BTC) ||
337- !SE.isKnownPredicate (CmpInst::ICMP_UGT, BTC, SE.getZero (BTC->getType ())))
334+ if (isa<SCEVCouldNotCompute>(BTC))
338335 return false ;
339336
340337 // Check if the exit condition of the loop can be adjusted by the peeling
@@ -354,6 +351,7 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
354351 m_BasicBlock (Succ1), m_BasicBlock (Succ2))) &&
355352 ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader ()) ||
356353 (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader ())) &&
354+ Bound->getType ()->isIntegerTy () &&
357355 SE.isLoopInvariant (SE.getSCEV (Bound), &L) &&
358356 match (SE.getSCEV (Inc),
359357 m_scev_AffineAddRec (m_SCEV (), m_scev_One (), m_SpecificLoop (&L)));
@@ -364,12 +362,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
364362// / is known at the second-to-last.
365363static bool shouldPeelLastIteration (Loop &L, CmpPredicate Pred,
366364 const SCEVAddRecExpr *LeftAR,
367- const SCEV *RightSCEV,
368- ScalarEvolution &SE ) {
365+ const SCEV *RightSCEV, ScalarEvolution &SE,
366+ const TargetTransformInfo &TTI ) {
369367 if (!canPeelLastIteration (L, SE))
370368 return false ;
371369
372370 const SCEV *BTC = SE.getBackedgeTakenCount (&L);
371+ SCEVExpander Expander (SE, L.getHeader ()->getDataLayout (), " loop-peel" );
372+ if (!SE.isKnownNonZero (BTC) &&
373+ Expander.isHighCostExpansion (BTC, &L, SCEVCheapExpansionBudget, &TTI,
374+ L.getLoopPredecessor ()->getTerminator ()))
375+ return false ;
376+
373377 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration (BTC, SE);
374378 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration (
375379 SE.getMinusSCEV (BTC, SE.getOne (BTC->getType ())), SE);
@@ -391,7 +395,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
391395// ..
392396// }
393397static std::pair<unsigned , unsigned >
394- countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
398+ countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
399+ const TargetTransformInfo &TTI) {
395400 assert (L.isLoopSimplifyForm () && " Loop needs to be in loop simplify form" );
396401 unsigned DesiredPeelCount = 0 ;
397402 unsigned DesiredPeelCountLast = 0 ;
@@ -479,7 +484,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
479484 const SCEV *Step = LeftAR->getStepRecurrence (SE);
480485 if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
481486 Pred)) {
482- if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE))
487+ if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE, TTI ))
483488 DesiredPeelCountLast = 1 ;
484489 return ;
485490 }
@@ -593,8 +598,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
593598void llvm::computePeelCount (Loop *L, unsigned LoopSize,
594599 TargetTransformInfo::PeelingPreferences &PP,
595600 unsigned TripCount, DominatorTree &DT,
596- ScalarEvolution &SE, AssumptionCache *AC ,
597- unsigned Threshold) {
601+ ScalarEvolution &SE, const TargetTransformInfo &TTI ,
602+ AssumptionCache *AC, unsigned Threshold) {
598603 assert (LoopSize > 0 && " Zero loop size is not allowed!" );
599604 // Save the PP.PeelCount value set by the target in
600605 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -656,7 +661,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
656661 }
657662
658663 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
659- countToEliminateCompares (*L, MaxPeelCount, SE);
664+ countToEliminateCompares (*L, MaxPeelCount, SE, TTI );
660665 DesiredPeelCount = std::max (DesiredPeelCount, CountToEliminateCmps);
661666
662667 if (DesiredPeelCount == 0 )
@@ -822,7 +827,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
822827// / instructions in the last peeled-off iteration.
823828static void cloneLoopBlocks (
824829 Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
825- BasicBlock *InsertBot,
830+ BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
826831 SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
827832 SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
828833 ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -914,12 +919,22 @@ static void cloneLoopBlocks(
914919 // loop iteration. Since this copy is no longer part of the loop, we
915920 // resolve this statically:
916921 if (PeelLast) {
917- // For the last iteration, we use the value from the latch of the original
918- // loop directly.
922+ // For the last iteration, we introduce new phis for each header phi in
923+ // InsertTop, using the incoming value from the preheader for the original
924+ // preheader (when skipping the main loop) and the incoming value from the
925+ // latch for the latch (when continuing from the main loop).
926+ IRBuilder<> B (InsertTop, InsertTop->getFirstNonPHIIt ());
919927 for (BasicBlock::iterator I = Header->begin (); isa<PHINode>(I); ++I) {
920928 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
921- VMap[&*I] = NewPHI->getIncomingValueForBlock (Latch );
929+ PHINode *PN = B. CreatePHI ( NewPHI->getType (), 2 );
922930 NewPHI->eraseFromParent ();
931+ if (OrigPreHeader)
932+ PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (PreHeader),
933+ OrigPreHeader);
934+
935+ PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (Latch),
936+ Latch);
937+ VMap[&*I] = PN;
923938 }
924939 } else {
925940 // For the first iteration, we use the value from the preheader directly.
@@ -1053,7 +1068,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10531068 // Set up all the necessary basic blocks.
10541069 BasicBlock *InsertTop;
10551070 BasicBlock *InsertBot;
1056- BasicBlock *NewPreHeader;
1071+ BasicBlock *NewPreHeader = nullptr ;
10571072 DenseMap<Instruction *, Value *> ExitValues;
10581073 if (PeelLast) {
10591074 // It is convenient to split the single exit block from the latch the
@@ -1084,11 +1099,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10841099 for (PHINode &P : Exit->phis ())
10851100 ExitValues[&P] = P.getIncomingValueForBlock (Latch);
10861101
1102+ const SCEV *BTC = SE->getBackedgeTakenCount (L);
1103+
10871104 InsertTop = SplitEdge (Latch, Exit, &DT, LI);
10881105 InsertBot = SplitBlock (InsertTop, InsertTop->getTerminator (), &DT, LI);
10891106
10901107 InsertTop->setName (Exit->getName () + " .peel.begin" );
10911108 InsertBot->setName (Exit->getName () + " .peel.next" );
1109+ NewPreHeader = nullptr ;
1110+
1111+ // If the original loop may only execute a single iteration we need to
1112+ // insert a trip count check and skip the original loop with the last
1113+ // iteration peeled off if necessary.
1114+ if (!SE->isKnownNonZero (BTC)) {
1115+ NewPreHeader = SplitEdge (PreHeader, Header, &DT, LI);
1116+ SCEVExpander Expander (*SE, Latch->getDataLayout (), " loop-peel" );
1117+
1118+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator ());
1119+ Value *BTCValue =
1120+ Expander.expandCodeFor (BTC, BTC->getType (), PreHeaderBR);
1121+ IRBuilder<> B (PreHeaderBR);
1122+ Value *Cond =
1123+ B.CreateICmpNE (BTCValue, ConstantInt::get (BTCValue->getType (), 0 ));
1124+ B.CreateCondBr (Cond, NewPreHeader, InsertTop);
1125+ PreHeaderBR->eraseFromParent ();
1126+
1127+ // PreHeader now dominates InsertTop.
1128+ DT.changeImmediateDominator (InsertTop, PreHeader);
1129+ }
10921130 } else {
10931131 // It is convenient to split the preheader into 3 parts - two blocks to
10941132 // anchor the peeled copy of the loop body, and a new preheader for the
@@ -1162,8 +1200,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11621200 for (unsigned Iter = 0 ; Iter < PeelCount; ++Iter) {
11631201 SmallVector<BasicBlock *, 8 > NewBlocks;
11641202
1165- cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166- NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
1203+ cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot,
1204+ NewPreHeader ? PreHeader : nullptr , ExitEdges, NewBlocks,
1205+ LoopBlocks, VMap, LVMap, &DT, LI,
11671206 LoopLocalNoAliasDeclScopes, *SE);
11681207
11691208 // Remap to use values from the current iteration instead of the
@@ -1216,9 +1255,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
12161255
12171256 if (PeelLast) {
12181257 // Now adjust users of the original exit values by replacing them with the
1219- // exit value from the peeled iteration.
1220- for (const auto &[P, E] : ExitValues)
1258+ // exit value from the peeled iteration and remove them .
1259+ for (const auto &[P, E] : ExitValues) {
12211260 P->replaceAllUsesWith (isa<Constant>(E) ? E : &*VMap.lookup (E));
1261+ P->eraseFromParent ();
1262+ }
12221263 formLCSSA (*L, DT, LI, SE);
12231264 } else {
12241265 // Now adjust the phi nodes in the loop header to get their initial values
0 commit comments