@@ -207,6 +207,32 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207207 return Parent->getEnclosingBlockWithPredecessors ();
208208}
209209
210+ bool VPBlockUtils::isHeader (const VPBlockBase *VPB,
211+ const VPDominatorTree &VPDT) {
212+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
213+ if (!VPBB)
214+ return false ;
215+
216+ // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
217+ // VPBB as its entry, i.e., free of predecessors.
218+ if (auto *R = VPBB->getParent ())
219+ return !R->isReplicator () && VPBB->getNumPredecessors () == 0 ;
220+
221+ // A header dominates its second predecessor (the latch), with the other
222+ // predecessor being the preheader
223+ return VPB->getPredecessors ().size () == 2 &&
224+ VPDT.dominates (VPB, VPB->getPredecessors ()[1 ]);
225+ }
226+
227+ bool VPBlockUtils::isLatch (const VPBlockBase *VPB,
228+ const VPDominatorTree &VPDT) {
229+ // A latch has a header as its second successor, with its other successor
230+ // leaving the loop. A preheader OTOH has a header as its first (and only)
231+ // successor.
232+ return VPB->getNumSuccessors () == 2 &&
233+ VPBlockUtils::isHeader (VPB->getSuccessors ()[1 ], VPDT);
234+ }
235+
210236VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi () {
211237 iterator It = begin ();
212238 while (It != end () && It->isPhi ())
@@ -424,13 +450,21 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
424450 if (ParentLoop && !State.LI ->getLoopFor (NewBB))
425451 ParentLoop->addBasicBlockToLoop (NewBB, *State.LI );
426452
453+ SmallVector<VPBlockBase *> Preds;
454+ if (VPBlockUtils::isHeader (this , State.VPDT )) {
455+ // There's no block for the latch yet, connect to the preheader only.
456+ Preds = {getPredecessors ()[0 ]};
457+ } else {
458+ Preds = to_vector (getPredecessors ());
459+ }
460+
427461 // Hook up the new basic block to its predecessors.
428- for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors () ) {
462+ for (VPBlockBase *PredVPBlock : Preds ) {
429463 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
430464 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
465+ assert (CFG.VPBB2IRBB .contains (PredVPBB) &&
466+ " Predecessor basic-block not found building successor." );
431467 BasicBlock *PredBB = CFG.VPBB2IRBB [PredVPBB];
432-
433- assert (PredBB && " Predecessor basic-block not found building successor." );
434468 auto *PredBBTerminator = PredBB->getTerminator ();
435469 LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
436470
@@ -491,11 +525,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
491525 bool Replica = bool (State->Lane );
492526 BasicBlock *NewBB = State->CFG .PrevBB ; // Reuse it if possible.
493527
528+ if (VPBlockUtils::isHeader (this , State->VPDT )) {
529+ // Create and register the new vector loop.
530+ Loop *PrevParentLoop = State->CurrentParentLoop ;
531+ State->CurrentParentLoop = State->LI ->AllocateLoop ();
532+
533+ // Insert the new loop into the loop nest and register the new basic blocks
534+ // before calling any utilities such as SCEV that require valid LoopInfo.
535+ if (PrevParentLoop)
536+ PrevParentLoop->addChildLoop (State->CurrentParentLoop );
537+ else
538+ State->LI ->addTopLevelLoop (State->CurrentParentLoop );
539+ }
540+
494541 auto IsReplicateRegion = [](VPBlockBase *BB) {
495542 auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
496- return R && R->isReplicator ();
543+ assert ((!R || R->isReplicator ()) &&
544+ " only replicate region blocks should remain" );
545+ return R;
497546 };
498-
499547 // 1. Create an IR basic block.
500548 if ((Replica && this == getParent ()->getEntry ()) ||
501549 IsReplicateRegion (getSingleHierarchicalPredecessor ())) {
@@ -518,6 +566,10 @@ void VPBasicBlock::execute(VPTransformState *State) {
518566
519567 // 2. Fill the IR basic block with IR instructions.
520568 executeRecipes (State, NewBB);
569+
570+ // If this block is a latch, update CurrentParentLoop.
571+ if (VPBlockUtils::isLatch (this , State->VPDT ))
572+ State->CurrentParentLoop = State->CurrentParentLoop ->getParentLoop ();
521573}
522574
523575VPBasicBlock *VPBasicBlock::clone () {
@@ -729,35 +781,13 @@ VPRegionBlock *VPRegionBlock::clone() {
729781}
730782
731783void VPRegionBlock::execute (VPTransformState *State) {
732- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
733- RPOT (Entry);
734-
735- if (!isReplicator ()) {
736- // Create and register the new vector loop.
737- Loop *PrevParentLoop = State->CurrentParentLoop ;
738- State->CurrentParentLoop = State->LI ->AllocateLoop ();
739-
740- // Insert the new loop into the loop nest and register the new basic blocks
741- // before calling any utilities such as SCEV that require valid LoopInfo.
742- if (PrevParentLoop)
743- PrevParentLoop->addChildLoop (State->CurrentParentLoop );
744- else
745- State->LI ->addTopLevelLoop (State->CurrentParentLoop );
746-
747- // Visit the VPBlocks connected to "this", starting from it.
748- for (VPBlockBase *Block : RPOT) {
749- LLVM_DEBUG (dbgs () << " LV: VPBlock in RPO " << Block->getName () << ' \n ' );
750- Block->execute (State);
751- }
752-
753- State->CurrentParentLoop = PrevParentLoop;
754- return ;
755- }
756-
784+ assert (isReplicator () &&
785+ " Loop regions should have been lowered to plain CFG" );
757786 assert (!State->Lane && " Replicating a Region with non-null instance." );
758-
759- // Enter replicating mode.
760787 assert (!State->VF .isScalable () && " VF is assumed to be non scalable." );
788+
789+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
790+ Entry);
761791 State->Lane = VPLane (0 );
762792 for (unsigned Lane = 0 , VF = State->VF .getKnownMinValue (); Lane < VF;
763793 ++Lane) {
@@ -851,6 +881,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
851881}
852882#endif
853883
884+ void VPRegionBlock::dissolveToCFGLoop () {
885+ auto *Header = cast<VPBasicBlock>(getEntry ());
886+ VPBlockBase *Preheader = getSinglePredecessor ();
887+ auto *ExitingLatch = cast<VPBasicBlock>(getExiting ());
888+ VPBlockBase *Middle = getSingleSuccessor ();
889+ VPBlockUtils::disconnectBlocks (Preheader, this );
890+ VPBlockUtils::disconnectBlocks (this , Middle);
891+
892+ for (VPBlockBase *VPB : vp_depth_first_shallow (Entry))
893+ VPB->setParent (getParent ());
894+
895+ VPBlockUtils::connectBlocks (Preheader, Header);
896+ VPBlockUtils::connectBlocks (ExitingLatch, Middle);
897+ VPBlockUtils::connectBlocks (ExitingLatch, Header);
898+ }
899+
854900VPlan::VPlan (Loop *L) {
855901 setEntry (createVPIRBasicBlock (L->getLoopPreheader ()));
856902 ScalarHeader = createVPIRBasicBlock (L->getHeader ());
@@ -962,16 +1008,15 @@ void VPlan::execute(VPTransformState *State) {
9621008
9631009 State->CFG .DTU .flush ();
9641010
965- auto *LoopRegion = getVectorLoopRegion ( );
966- if (!LoopRegion )
1011+ VPBasicBlock *Header = vputils::getFirstLoopHeader (* this , State-> VPDT );
1012+ if (!Header )
9671013 return ;
9681014
969- VPBasicBlock *LatchVPBB = LoopRegion-> getExitingBasicBlock ( );
1015+ auto *LatchVPBB = cast<VPBasicBlock>(Header-> getPredecessors ()[ 1 ] );
9701016 BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
9711017
9721018 // Fix the latch value of canonical, reduction and first-order recurrences
9731019 // phis in the vector loop.
974- VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
9751020 for (VPRecipeBase &R : Header->phis ()) {
9761021 // Skip phi-like recipes that generate their backedege values themselves.
9771022 if (isa<VPWidenPHIRecipe>(&R))
@@ -1007,8 +1052,10 @@ void VPlan::execute(VPTransformState *State) {
10071052 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
10081053 (isa<VPReductionPHIRecipe>(PhiR) &&
10091054 cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1055+
10101056 Value *Phi = State->get (PhiR, NeedsScalar);
1011- // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1057+ // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1058+ // not.
10121059 Value *Val = State->get (PhiR->getOperand (1 ), NeedsScalar);
10131060 cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
10141061 }
0 commit comments