3838#include  " llvm/Transforms/Utils/Cloning.h" 
3939#include  " llvm/Transforms/Utils/LoopSimplify.h" 
4040#include  " llvm/Transforms/Utils/LoopUtils.h" 
41+ #include  " llvm/Transforms/Utils/ScalarEvolutionExpander.h" 
4142#include  " llvm/Transforms/Utils/ValueMapper.h" 
4243#include  < algorithm> 
4344#include  < cassert> 
@@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
330331
331332bool  llvm::canPeelLastIteration (const  Loop &L, ScalarEvolution &SE) {
332333  const  SCEV *BTC = SE.getBackedgeTakenCount (&L);
333-   //  The loop must execute at least 2 iterations to guarantee that peeled
334-   //  iteration executes.
335-   //  TODO: Add checks during codegen.
336-   if  (isa<SCEVCouldNotCompute>(BTC) ||
337-       !SE.isKnownPredicate (CmpInst::ICMP_UGT, BTC, SE.getZero (BTC->getType ())))
334+   if  (isa<SCEVCouldNotCompute>(BTC))
338335    return  false ;
339336
340337  //  Check if the exit condition of the loop can be adjusted by the peeling
@@ -364,12 +361,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
364361// / is known at the second-to-last.
365362static  bool  shouldPeelLastIteration (Loop &L, CmpPredicate Pred,
366363                                    const  SCEVAddRecExpr *LeftAR,
367-                                     const  SCEV *RightSCEV,
368-                                     ScalarEvolution &SE ) {
364+                                     const  SCEV *RightSCEV, ScalarEvolution &SE, 
365+                                     const  TargetTransformInfo &TTI ) {
369366  if  (!canPeelLastIteration (L, SE))
370367    return  false ;
371368
372369  const  SCEV *BTC = SE.getBackedgeTakenCount (&L);
370+   SCEVExpander Expander (SE, L.getHeader ()->getDataLayout (), " loop-peel"  );
371+   if  (!SE.isKnownNonZero (BTC) &&
372+       Expander.isHighCostExpansion (BTC, &L, SCEVCheapExpansionBudget, &TTI,
373+                                    L.getLoopPredecessor ()->getTerminator ()))
374+     return  false ;
375+ 
373376  const  SCEV *ValAtLastIter = LeftAR->evaluateAtIteration (BTC, SE);
374377  const  SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration (
375378      SE.getMinusSCEV (BTC, SE.getOne (BTC->getType ())), SE);
@@ -391,7 +394,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
391394//       ..
392395//    }
393396static  std::pair<unsigned , unsigned >
394- countToEliminateCompares (Loop &L, unsigned  MaxPeelCount, ScalarEvolution &SE) {
397+ countToEliminateCompares (Loop &L, unsigned  MaxPeelCount, ScalarEvolution &SE,
398+                          const  TargetTransformInfo &TTI) {
395399  assert (L.isLoopSimplifyForm () && " Loop needs to be in loop simplify form"  );
396400  unsigned  DesiredPeelCount = 0 ;
397401  unsigned  DesiredPeelCountLast = 0 ;
@@ -479,7 +483,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
479483    const  SCEV *Step = LeftAR->getStepRecurrence (SE);
480484    if  (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
481485                                   Pred)) {
482-       if  (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE))
486+       if  (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE, TTI ))
483487        DesiredPeelCountLast = 1 ;
484488      return ;
485489    }
@@ -593,8 +597,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
593597void  llvm::computePeelCount (Loop *L, unsigned  LoopSize,
594598                            TargetTransformInfo::PeelingPreferences &PP,
595599                            unsigned  TripCount, DominatorTree &DT,
596-                             ScalarEvolution &SE, AssumptionCache *AC ,
597-                             unsigned  Threshold) {
600+                             ScalarEvolution &SE, const  TargetTransformInfo &TTI ,
601+                             AssumptionCache *AC,  unsigned  Threshold) {
598602  assert (LoopSize > 0  && " Zero loop size is not allowed!"  );
599603  //  Save the PP.PeelCount value set by the target in
600604  //  TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -656,7 +660,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
656660  }
657661
658662  const  auto  &[CountToEliminateCmps, CountToEliminateCmpsLast] =
659-       countToEliminateCompares (*L, MaxPeelCount, SE);
663+       countToEliminateCompares (*L, MaxPeelCount, SE, TTI );
660664  DesiredPeelCount = std::max (DesiredPeelCount, CountToEliminateCmps);
661665
662666  if  (DesiredPeelCount == 0 )
@@ -822,7 +826,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
822826// / instructions in the last peeled-off iteration.
823827static  void  cloneLoopBlocks (
824828    Loop *L, unsigned  IterNumber, bool  PeelLast, BasicBlock *InsertTop,
825-     BasicBlock *InsertBot,
829+     BasicBlock *InsertBot, BasicBlock *OrigPreHeader, 
826830    SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
827831    SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
828832    ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -914,12 +918,22 @@ static void cloneLoopBlocks(
914918  //  loop iteration. Since this copy is no longer part of the loop, we
915919  //  resolve this statically:
916920  if  (PeelLast) {
917-     //  For the last iteration, we use the value from the latch of the original
918-     //  loop directly.
921+     //  For the last iteration, we introduce new phis for each header phi in
922+     //  InsertTop, using the incoming value from the preheader for the original
923+     //  preheader (when skipping the main loop) and the incoming value from the
924+     //  latch for the latch (when continuing from the main loop).
925+     IRBuilder<> B (InsertTop->getTerminator ());
919926    for  (BasicBlock::iterator I = Header->begin (); isa<PHINode>(I); ++I) {
920927      PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
921-       VMap[&*I]  = NewPHI->getIncomingValueForBlock (Latch );
928+       PHINode *PN  = B. CreatePHI ( NewPHI->getType (),  2 );
922929      NewPHI->eraseFromParent ();
930+       if  (OrigPreHeader)
931+         PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (PreHeader),
932+                         OrigPreHeader);
933+ 
934+       PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (Latch),
935+                       Latch);
936+       VMap[&*I] = PN;
923937    }
924938  } else  {
925939    //  For the first iteration, we use the value from the preheader directly.
@@ -1053,7 +1067,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10531067  //  Set up all the necessary basic blocks.
10541068  BasicBlock *InsertTop;
10551069  BasicBlock *InsertBot;
1056-   BasicBlock *NewPreHeader;
1070+   BasicBlock *NewPreHeader =  nullptr ;
10571071  DenseMap<Instruction *, Value *> ExitValues;
10581072  if  (PeelLast) {
10591073    //  It is convenient to split the single exit block from the latch the
@@ -1084,11 +1098,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
10841098    for  (PHINode &P : Exit->phis ())
10851099      ExitValues[&P] = P.getIncomingValueForBlock (Latch);
10861100
1101+     const  SCEV *BTC = SE->getBackedgeTakenCount (L);
1102+ 
10871103    InsertTop = SplitEdge (Latch, Exit, &DT, LI);
10881104    InsertBot = SplitBlock (InsertTop, InsertTop->getTerminator (), &DT, LI);
10891105
10901106    InsertTop->setName (Exit->getName () + " .peel.begin"  );
10911107    InsertBot->setName (Exit->getName () + " .peel.next"  );
1108+     NewPreHeader = nullptr ;
1109+ 
1110+     //  If the original loop may only execute a single iteration we need to
1111+     //  insert a trip count check and skip the original loop with the last
1112+     //  iteration peeled off if necessary.
1113+     if  (!SE->isKnownNonZero (BTC)) {
1114+       NewPreHeader = SplitEdge (PreHeader, Header, &DT, LI);
1115+       SCEVExpander Expander (*SE, Latch->getDataLayout (), " loop-peel"  );
1116+ 
1117+       BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator ());
1118+       Value *BTCValue =
1119+           Expander.expandCodeFor (BTC, BTC->getType (), PreHeaderBR);
1120+       IRBuilder<> B (PreHeaderBR);
1121+       Value *Cond =
1122+           B.CreateICmpNE (BTCValue, ConstantInt::get (BTCValue->getType (), 0 ));
1123+       B.CreateCondBr (Cond, NewPreHeader, InsertTop);
1124+       PreHeaderBR->eraseFromParent ();
1125+ 
1126+       //  PreHeader now dominates InsertTop.
1127+       DT.changeImmediateDominator (InsertTop, PreHeader);
1128+     }
10921129  } else  {
10931130    //  It is convenient to split the preheader into 3 parts - two blocks to
10941131    //  anchor the peeled copy of the loop body, and a new preheader for the
@@ -1162,8 +1199,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11621199  for  (unsigned  Iter = 0 ; Iter < PeelCount; ++Iter) {
11631200    SmallVector<BasicBlock *, 8 > NewBlocks;
11641201
1165-     cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166-                     NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
1202+     cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot,
1203+                     NewPreHeader ? PreHeader : nullptr , ExitEdges, NewBlocks,
1204+                     LoopBlocks, VMap, LVMap, &DT, LI,
11671205                    LoopLocalNoAliasDeclScopes, *SE);
11681206
11691207    //  Remap to use values from the current iteration instead of the
@@ -1216,9 +1254,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
12161254
12171255  if  (PeelLast) {
12181256    //  Now adjust users of the original exit values by replacing them with the
1219-     //  exit value from the peeled iteration.
1220-     for  (const  auto  &[P, E] : ExitValues)
1257+     //  exit value from the peeled iteration and remove them .
1258+     for  (const  auto  &[P, E] : ExitValues) { 
12211259      P->replaceAllUsesWith (isa<Constant>(E) ? E : &*VMap.lookup (E));
1260+       P->eraseFromParent ();
1261+     }
12221262    formLCSSA (*L, DT, LI, SE);
12231263  } else  {
12241264    //  Now adjust the phi nodes in the loop header to get their initial values
0 commit comments