3838#include " llvm/Transforms/Utils/Cloning.h"
3939#include " llvm/Transforms/Utils/LoopSimplify.h"
4040#include " llvm/Transforms/Utils/LoopUtils.h"
41+ #include " llvm/Transforms/Utils/ScalarEvolutionExpander.h"
4142#include " llvm/Transforms/Utils/ValueMapper.h"
4243#include < algorithm>
4344#include < cassert>
@@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
330331
331332bool llvm::canPeelLastIteration (const Loop &L, ScalarEvolution &SE) {
332333 const SCEV *BTC = SE.getBackedgeTakenCount (&L);
333- // The loop must execute at least 2 iterations to guarantee that peeled
334- // iteration executes.
335- // TODO: Add checks during codegen.
336- if (isa<SCEVCouldNotCompute>(BTC) ||
337- !SE.isKnownPredicate (CmpInst::ICMP_UGT, BTC, SE.getZero (BTC->getType ())))
334+ if (isa<SCEVCouldNotCompute>(BTC))
338335 return false ;
339336
340337 // Check if the exit condition of the loop can be adjusted by the peeling
@@ -364,12 +361,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
364361// / is known at the second-to-last.
365362static bool shouldPeelLastIteration (Loop &L, CmpPredicate Pred,
366363 const SCEVAddRecExpr *LeftAR,
367- const SCEV *RightSCEV,
368- ScalarEvolution &SE ) {
364+ const SCEV *RightSCEV, ScalarEvolution &SE,
365+ const TargetTransformInfo &TTI ) {
369366 if (!canPeelLastIteration (L, SE))
370367 return false ;
371368
372369 const SCEV *BTC = SE.getBackedgeTakenCount (&L);
370+ SCEVExpander Expander (SE, L.getHeader ()->getDataLayout (), " loop-peel" );
371+ if (!SE.isKnownNonZero (BTC) &&
372+ Expander.isHighCostExpansion (BTC, &L, SCEVCheapExpansionBudget, &TTI,
373+ L.getLoopPredecessor ()->getTerminator ()))
374+ return false ;
375+
373376 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration (BTC, SE);
374377 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration (
375378 SE.getMinusSCEV (BTC, SE.getOne (BTC->getType ())), SE);
@@ -391,7 +394,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
391394// ..
392395// }
393396static std::pair<unsigned , unsigned >
394- countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
397+ countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
398+ const TargetTransformInfo &TTI) {
395399 assert (L.isLoopSimplifyForm () && " Loop needs to be in loop simplify form" );
396400 unsigned DesiredPeelCount = 0 ;
397401 unsigned DesiredPeelCountLast = 0 ;
@@ -479,7 +483,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
479483 const SCEV *Step = LeftAR->getStepRecurrence (SE);
480484 if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
481485 Pred)) {
482- if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE))
486+ if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE, TTI ))
483487 DesiredPeelCountLast = 1 ;
484488 return ;
485489 }
@@ -593,8 +597,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
593597void llvm::computePeelCount (Loop *L, unsigned LoopSize,
594598 TargetTransformInfo::PeelingPreferences &PP,
595599 unsigned TripCount, DominatorTree &DT,
596- ScalarEvolution &SE, AssumptionCache *AC ,
597- unsigned Threshold) {
600+ ScalarEvolution &SE, const TargetTransformInfo &TTI ,
601+ AssumptionCache *AC, unsigned Threshold) {
598602 assert (LoopSize > 0 && " Zero loop size is not allowed!" );
599603 // Save the PP.PeelCount value set by the target in
600604 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -656,7 +660,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
656660 }
657661
658662 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
659- countToEliminateCompares (*L, MaxPeelCount, SE);
663+ countToEliminateCompares (*L, MaxPeelCount, SE, TTI );
660664 DesiredPeelCount = std::max (DesiredPeelCount, CountToEliminateCmps);
661665
662666 if (DesiredPeelCount == 0 )
@@ -822,7 +826,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
822826// / instructions in the last peeled-off iteration.
823827static void cloneLoopBlocks (
824828 Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
825- BasicBlock *InsertBot,
829+ BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
826830 SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
827831 SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
828832 ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -914,12 +918,22 @@ static void cloneLoopBlocks(
914918 // loop iteration. Since this copy is no longer part of the loop, we
915919 // resolve this statically:
916920 if (PeelLast) {
917- // For the last iteration, we use the value from the latch of the original
918- // loop directly.
921+ // For the last iteration, we introduce new phis for each header phi in
922+ // InsertTop, using the incoming value from the preheader for the original
923+ // preheader (when skipping the main loop) and the incoming value from the
924+ // latch for the latch (when continuing from the main loop).
925+ IRBuilder<> B (InsertTop->getTerminator ());
919926 for (BasicBlock::iterator I = Header->begin (); isa<PHINode>(I); ++I) {
920927 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
921- VMap[&*I] = NewPHI->getIncomingValueForBlock (Latch );
928+ PHINode *PN = B. CreatePHI ( NewPHI->getType (), 2 );
922929 NewPHI->eraseFromParent ();
930+ if (OrigPreHeader)
931+ PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (PreHeader),
932+ OrigPreHeader);
933+
934+ PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (Latch),
935+ Latch);
936+ VMap[&*I] = PN;
923937 }
924938 } else {
925939 // For the first iteration, we use the value from the preheader directly.
@@ -1053,7 +1067,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10531067 // Set up all the necessary basic blocks.
10541068 BasicBlock *InsertTop;
10551069 BasicBlock *InsertBot;
1056- BasicBlock *NewPreHeader;
1070+ BasicBlock *NewPreHeader = nullptr ;
10571071 DenseMap<Instruction *, Value *> ExitValues;
10581072 if (PeelLast) {
10591073 // It is convenient to split the single exit block from the latch the
@@ -1084,11 +1098,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10841098 for (PHINode &P : Exit->phis ())
10851099 ExitValues[&P] = P.getIncomingValueForBlock (Latch);
10861100
1101+ const SCEV *BTC = SE->getBackedgeTakenCount (L);
1102+
10871103 InsertTop = SplitEdge (Latch, Exit, &DT, LI);
10881104 InsertBot = SplitBlock (InsertTop, InsertTop->getTerminator (), &DT, LI);
10891105
10901106 InsertTop->setName (Exit->getName () + " .peel.begin" );
10911107 InsertBot->setName (Exit->getName () + " .peel.next" );
1108+ NewPreHeader = nullptr ;
1109+
1110+ // If the original loop may only execute a single iteration we need to
1111+ // insert a trip count check and skip the original loop with the last
1112+ // iteration peeled off if necessary.
1113+ if (!SE->isKnownNonZero (BTC)) {
1114+ NewPreHeader = SplitEdge (PreHeader, Header, &DT, LI);
1115+ SCEVExpander Expander (*SE, Latch->getDataLayout (), " loop-peel" );
1116+
1117+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator ());
1118+ Value *BTCValue =
1119+ Expander.expandCodeFor (BTC, BTC->getType (), PreHeaderBR);
1120+ IRBuilder<> B (PreHeaderBR);
1121+ Value *Cond =
1122+ B.CreateICmpNE (BTCValue, ConstantInt::get (BTCValue->getType (), 0 ));
1123+ B.CreateCondBr (Cond, NewPreHeader, InsertTop);
1124+ PreHeaderBR->eraseFromParent ();
1125+
1126+ // PreHeader now dominates InsertTop.
1127+ DT.changeImmediateDominator (InsertTop, PreHeader);
1128+ }
10921129 } else {
10931130 // It is convenient to split the preheader into 3 parts - two blocks to
10941131 // anchor the peeled copy of the loop body, and a new preheader for the
@@ -1162,8 +1199,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11621199 for (unsigned Iter = 0 ; Iter < PeelCount; ++Iter) {
11631200 SmallVector<BasicBlock *, 8 > NewBlocks;
11641201
1165- cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166- NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
1202+ cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot,
1203+ NewPreHeader ? PreHeader : nullptr , ExitEdges, NewBlocks,
1204+ LoopBlocks, VMap, LVMap, &DT, LI,
11671205 LoopLocalNoAliasDeclScopes, *SE);
11681206
11691207 // Remap to use values from the current iteration instead of the
@@ -1216,9 +1254,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
12161254
12171255 if (PeelLast) {
12181256 // Now adjust users of the original exit values by replacing them with the
1219- // exit value from the peeled iteration.
1220- for (const auto &[P, E] : ExitValues)
1257+ // exit value from the peeled iteration and remove them .
1258+ for (const auto &[P, E] : ExitValues) {
12211259 P->replaceAllUsesWith (isa<Constant>(E) ? E : &*VMap.lookup (E));
1260+ P->eraseFromParent ();
1261+ }
12221262 formLCSSA (*L, DT, LI, SE);
12231263 } else {
12241264 // Now adjust the phi nodes in the loop header to get their initial values
0 commit comments