@@ -201,18 +201,27 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
201201// / unroll count is non-zero.
202202// /
203203// / This function performs the following:
204- // / - Update PHI nodes at the unrolling loop exit and epilog loop exit
205- // / - Create PHI nodes at the unrolling loop exit to combine
206- // / values that exit the unrolling loop code and jump around it.
204+ // / - Update PHI nodes at the epilog loop exit
205+ // / - Create PHI nodes at the unrolling loop exit and epilog preheader to
206+ // / combine values that exit the unrolling loop code and jump around it.
207207// / - Update PHI operands in the epilog loop by the new PHI nodes
208- // / - Branch around the epilog loop if extra iters (ModVal) is zero.
208+ // / - At the unrolling loop exit, branch around the epilog loop if extra iters
209+ // (ModVal) is zero.
210+ // / - At the epilog preheader, add an llvm.assume call that extra iters is
211+ // / non-zero. If the unrolling loop exit is the predecessor, the above new
212+ // / branch guarantees that assumption. If the unrolling loop preheader is the
213+ // / predecessor, then the required first iteration from the original loop has
214+ // / yet to be executed, so it must be executed in the epilog loop. If we
215+ // / later unroll the epilog loop, that llvm.assume call somehow enables
216+ // / ScalarEvolution to compute a epilog loop maximum trip count, which enables
217+ // / eliminating the branch at the end of the final unrolled epilog iteration.
209218// /
210219static void ConnectEpilog (Loop *L, Value *ModVal, BasicBlock *NewExit,
211220 BasicBlock *Exit, BasicBlock *PreHeader,
212221 BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
213222 ValueToValueMapTy &VMap, DominatorTree *DT,
214223 LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
215- unsigned Count) {
224+ unsigned Count, AssumptionCache &AC ) {
216225 BasicBlock *Latch = L->getLoopLatch ();
217226 assert (Latch && " Loop must have a latch" );
218227 BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -231,7 +240,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
231240 // EpilogLatch
232241 // Exit (EpilogPN)
233242
234- // Update PHI nodes at NewExit and Exit.
243+ // Update PHI nodes at Exit.
235244 for (PHINode &PN : NewExit->phis ()) {
236245 // PN should be used in another PHI located in Exit block as
237246 // Exit was split by SplitBlockPredecessors into Exit and NewExit
@@ -246,15 +255,11 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
246255 // epilogue edges have already been added.
247256 //
248257 // There is EpilogPreHeader incoming block instead of NewExit as
249- // NewExit was spilt 1 more time to get EpilogPreHeader.
258+ // NewExit was split 1 more time to get EpilogPreHeader.
250259 assert (PN.hasOneUse () && " The phi should have 1 use" );
251260 PHINode *EpilogPN = cast<PHINode>(PN.use_begin ()->getUser ());
252261 assert (EpilogPN->getParent () == Exit && " EpilogPN should be in Exit block" );
253262
254- // Add incoming PreHeader from branch around the Loop
255- PN.addIncoming (PoisonValue::get (PN.getType ()), PreHeader);
256- SE.forgetValue (&PN);
257-
258263 Value *V = PN.getIncomingValueForBlock (Latch);
259264 Instruction *I = dyn_cast<Instruction>(V);
260265 if (I && L->contains (I))
@@ -271,35 +276,52 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
271276 NewExit);
272277 // Now PHIs should look like:
273278 // NewExit:
274- // PN = PHI [I, Latch], [poison, PreHeader]
279+ // PN = PHI [I, Latch]
275280 // ...
276281 // Exit:
277282 // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
278283 }
279284
280- // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
281- // Update corresponding PHI nodes in epilog loop.
285+ // Create PHI nodes at NewExit (from the unrolling loop Latch) and at
286+ // EpilogPreHeader (from PreHeader and NewExit). Update corresponding PHI
287+ // nodes in epilog loop.
282288 for (BasicBlock *Succ : successors (Latch)) {
283289 // Skip this as we already updated phis in exit blocks.
284290 if (!L->contains (Succ))
285291 continue ;
292+
293+ // Succ here appears to always be just L->getHeader(). Otherwise, how do we
294+ // know its corresponding epilog block (from VMap) is EpilogHeader and thus
295+ // EpilogPreHeader is the right incoming block for VPN, as set below?
296+ // TODO: Can we thus avoid the enclosing loop over successors?
297+ assert (Succ == L->getHeader () &&
298+ " Expect the only in-loop successor of latch to be the loop header" );
299+
286300 for (PHINode &PN : Succ->phis ()) {
287- // Add new PHI nodes to the loop exit block and update epilog
288- // PHIs with the new PHI values.
289- PHINode *NewPN = PHINode::Create (PN.getType (), 2 , PN.getName () + " .unr" );
290- NewPN->insertBefore (NewExit->getFirstNonPHIIt ());
291- // Adding a value to the new PHI node from the unrolling loop preheader.
292- NewPN->addIncoming (PN.getIncomingValueForBlock (NewPreHeader), PreHeader);
293- // Adding a value to the new PHI node from the unrolling loop latch.
294- NewPN->addIncoming (PN.getIncomingValueForBlock (Latch), Latch);
301+ // Add new PHI nodes to the loop exit block.
302+ PHINode *NewPN0 = PHINode::Create (PN.getType (), /* NumReservedValues=*/ 1 ,
303+ PN.getName () + " .unr" );
304+ NewPN0->insertBefore (NewExit->getFirstNonPHIIt ());
305+ // Add value to the new PHI node from the unrolling loop latch.
306+ NewPN0->addIncoming (PN.getIncomingValueForBlock (Latch), Latch);
307+
308+ // Add new PHI nodes to EpilogPreHeader.
309+ PHINode *NewPN1 = PHINode::Create (PN.getType (), /* NumReservedValues=*/ 2 ,
310+ PN.getName () + " .epil.init" );
311+ NewPN1->insertBefore (EpilogPreHeader->getFirstNonPHIIt ());
312+ // Add value to the new PHI node from the unrolling loop preheader.
313+ NewPN1->addIncoming (PN.getIncomingValueForBlock (NewPreHeader), PreHeader);
314+ // Add value to the new PHI node from the epilog loop guard.
315+ NewPN1->addIncoming (NewPN0, NewExit);
295316
296317 // Update the existing PHI node operand with the value from the new PHI
297318 // node. Corresponding instruction in epilog loop should be PHI.
298319 PHINode *VPN = cast<PHINode>(VMap[&PN]);
299- VPN->setIncomingValueForBlock (EpilogPreHeader, NewPN );
320+ VPN->setIncomingValueForBlock (EpilogPreHeader, NewPN1 );
300321 }
301322 }
302323
324+ // In NewExit, branch around the epilog loop if no extra iters.
303325 Instruction *InsertPt = NewExit->getTerminator ();
304326 IRBuilder<> B (InsertPt);
305327 Value *BrLoopExit = B.CreateIsNotNull (ModVal, " lcmp.mod" );
@@ -308,7 +330,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
308330 SmallVector<BasicBlock*, 4 > Preds (predecessors (Exit));
309331 SplitBlockPredecessors (Exit, Preds, " .epilog-lcssa" , DT, LI, nullptr ,
310332 PreserveLCSSA);
311- // Add the branch to the exit block (around the unrolling loop)
333+ // Add the branch to the exit block (around the epilog loop)
312334 MDNode *BranchWeights = nullptr ;
313335 if (hasBranchWeightMD (*Latch->getTerminator ())) {
314336 // Assume equal distribution in interval [0, Count).
@@ -322,10 +344,11 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
322344 DT->changeImmediateDominator (Exit, NewDom);
323345 }
324346
325- // Split the main loop exit to maintain canonicalization guarantees.
326- SmallVector<BasicBlock*, 4 > NewExitPreds{Latch};
327- SplitBlockPredecessors (NewExit, NewExitPreds, " .loopexit" , DT, LI, nullptr ,
328- PreserveLCSSA);
347+ // In EpilogPreHeader, assume extra iters is non-zero.
348+ IRBuilder<> B2 (EpilogPreHeader, EpilogPreHeader->getFirstNonPHIIt ());
349+ Value *ModIsNotNull = B2.CreateIsNotNull (ModVal, " lcmp.mod" );
350+ AssumeInst *AI = cast<AssumeInst>(B2.CreateAssumption (ModIsNotNull));
351+ AC.registerAssumption (AI);
329352}
330353
331354// / Create a clone of the blocks in a loop and connect them together. A new
@@ -795,7 +818,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
795818 ConstantInt::get (BECount->getType (),
796819 Count - 1 )) :
797820 B.CreateIsNotNull (ModVal, " lcmp.mod" );
798- BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
821+ BasicBlock *RemainderLoop =
822+ UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
799823 BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
800824 // Branch to either remainder (extra iterations) loop or unrolling loop.
801825 MDNode *BranchWeights = nullptr ;
@@ -808,7 +832,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
808832 PreHeaderBR->eraseFromParent ();
809833 if (DT) {
810834 if (UseEpilogRemainder)
811- DT->changeImmediateDominator (NewExit , PreHeader);
835+ DT->changeImmediateDominator (EpilogPreHeader , PreHeader);
812836 else
813837 DT->changeImmediateDominator (PrologExit, PreHeader);
814838 }
@@ -880,7 +904,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
880904 // from both the original loop and the remainder code reaching the exit
881905 // blocks. While the IDom of these exit blocks were from the original loop,
882906 // now the IDom is the preheader (which decides whether the original loop or
883- // remainder code should run).
907+ // remainder code should run) unless the block still has just the original
908+ // predecessor (such as NewExit in the case of an epilog remainder).
884909 if (DT && !L->getExitingBlock ()) {
885910 SmallVector<BasicBlock *, 16 > ChildrenToUpdate;
886911 // NB! We have to examine the dom children of all loop blocks, not just
@@ -891,7 +916,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
891916 auto *DomNodeBB = DT->getNode (BB);
892917 for (auto *DomChild : DomNodeBB->children ()) {
893918 auto *DomChildBB = DomChild->getBlock ();
894- if (!L->contains (LI->getLoopFor (DomChildBB)))
919+ if (!L->contains (LI->getLoopFor (DomChildBB)) &&
920+ DomChildBB->getUniquePredecessor () != BB)
895921 ChildrenToUpdate.push_back (DomChildBB);
896922 }
897923 }
@@ -930,7 +956,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
930956 // Connect the epilog code to the original loop and update the
931957 // PHI functions.
932958 ConnectEpilog (L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
933- NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
959+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC );
934960
935961 // Update counter in loop for unrolling.
936962 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
0 commit comments