@@ -207,6 +207,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207207 return Parent->getEnclosingBlockWithPredecessors ();
208208}
209209
210+ bool VPBasicBlock::isHeader (const VPDominatorTree &VPDT) const {
211+ return getPredecessors ().size () == 2 &&
212+ VPDT.dominates (this , getPredecessors ()[1 ]);
213+ }
214+
210215VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi () {
211216 iterator It = begin ();
212217 while (It != end () && It->isPhi ())
@@ -424,14 +429,18 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
424429 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
425430 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
426431 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
427- BasicBlock *PredBB = CFG.VPBB2IRBB [PredVPBB];
432+ BasicBlock *PredBB = CFG.VPBB2IRBB .lookup (PredVPBB);
433+ if (!PredBB)
434+ continue ;
428435
429436 assert (PredBB && " Predecessor basic-block not found building successor." );
430437 auto *PredBBTerminator = PredBB->getTerminator ();
431438 LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
432439
433440 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
434441 if (isa<UnreachableInst>(PredBBTerminator)) {
442+ if (PredVPSuccessors.size () == 2 )
443+ continue ;
435444 assert (PredVPSuccessors.size () == 1 &&
436445 " Predecessor ending w/o branch must have single successor." );
437446 DebugLoc DL = PredBBTerminator->getDebugLoc ();
@@ -487,11 +496,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
487496 bool Replica = bool (State->Lane );
488497 BasicBlock *NewBB = State->CFG .PrevBB ; // Reuse it if possible.
489498
499+ if (isHeader (State->VPDT )) {
500+ // Create and register the new vector loop.
501+ Loop *PrevParentLoop = State->CurrentParentLoop ;
502+ State->CurrentParentLoop = State->LI ->AllocateLoop ();
503+
504+ // Insert the new loop into the loop nest and register the new basic blocks
505+ // before calling any utilities such as SCEV that require valid LoopInfo.
506+ if (PrevParentLoop)
507+ PrevParentLoop->addChildLoop (State->CurrentParentLoop );
508+ else
509+ State->LI ->addTopLevelLoop (State->CurrentParentLoop );
510+ }
511+
490512 auto IsReplicateRegion = [](VPBlockBase *BB) {
491513 auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
492- return R && R->isReplicator ();
514+ assert ((!R || R->isReplicator ()) &&
515+ " only replicate region blocks should remain" );
516+ return R;
493517 };
494-
495518 // 1. Create an IR basic block.
496519 if ((Replica && this == getParent ()->getEntry ()) ||
497520 IsReplicateRegion (getSingleHierarchicalPredecessor ())) {
@@ -514,6 +537,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
514537
515538 // 2. Fill the IR basic block with IR instructions.
516539 executeRecipes (State, NewBB);
540+
541+ // If this block is a latch, update CurrentParentLoop.
542+ if (any_of (getSuccessors (), [State, this ](VPBlockBase *Succ) {
543+ auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
544+ return VPBB && VPBB->isHeader (State->VPDT ) &&
545+ State->VPDT .dominates (Succ, this );
546+ }))
547+ State->CurrentParentLoop = State->CurrentParentLoop ->getParentLoop ();
517548}
518549
519550VPBasicBlock *VPBasicBlock::clone () {
@@ -725,35 +756,13 @@ VPRegionBlock *VPRegionBlock::clone() {
725756}
726757
727758void VPRegionBlock::execute (VPTransformState *State) {
728- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
729- RPOT (Entry);
730-
731- if (!isReplicator ()) {
732- // Create and register the new vector loop.
733- Loop *PrevParentLoop = State->CurrentParentLoop ;
734- State->CurrentParentLoop = State->LI ->AllocateLoop ();
735-
736- // Insert the new loop into the loop nest and register the new basic blocks
737- // before calling any utilities such as SCEV that require valid LoopInfo.
738- if (PrevParentLoop)
739- PrevParentLoop->addChildLoop (State->CurrentParentLoop );
740- else
741- State->LI ->addTopLevelLoop (State->CurrentParentLoop );
742-
743- // Visit the VPBlocks connected to "this", starting from it.
744- for (VPBlockBase *Block : RPOT) {
745- LLVM_DEBUG (dbgs () << " LV: VPBlock in RPO " << Block->getName () << ' \n ' );
746- Block->execute (State);
747- }
748-
749- State->CurrentParentLoop = PrevParentLoop;
750- return ;
751- }
752-
759+ assert (isReplicator () &&
760+ " Loop regions should have been lowered to plain CFG" );
753761 assert (!State->Lane && " Replicating a Region with non-null instance." );
754-
755- // Enter replicating mode.
756762 assert (!State->VF .isScalable () && " VF is assumed to be non scalable." );
763+
764+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
765+ Entry);
757766 State->Lane = VPLane (0 );
758767 for (unsigned Lane = 0 , VF = State->VF .getKnownMinValue (); Lane < VF;
759768 ++Lane) {
@@ -847,6 +856,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
847856}
848857#endif
849858
859+ void VPRegionBlock::removeRegion () {
860+ auto *Header = cast<VPBasicBlock>(getEntry ());
861+ VPBlockBase *Preheader = getSinglePredecessor ();
862+ auto *Exiting = cast<VPBasicBlock>(getExiting ());
863+
864+ VPBlockBase *Middle = getSingleSuccessor ();
865+ VPBlockUtils::disconnectBlocks (Preheader, this );
866+ VPBlockUtils::disconnectBlocks (this , Middle);
867+
868+ for (VPBlockBase *VPB : vp_depth_first_shallow (Entry))
869+ VPB->setParent (nullptr );
870+
871+ VPBlockUtils::connectBlocks (Preheader, Header);
872+ VPBlockUtils::connectBlocks (Exiting, Middle);
873+ }
874+
850875VPlan::VPlan (Loop *L) {
851876 setEntry (createVPIRBasicBlock (L->getLoopPreheader ()));
852877 ScalarHeader = createVPIRBasicBlock (L->getHeader ());
@@ -956,57 +981,57 @@ void VPlan::execute(VPTransformState *State) {
956981 for (VPBlockBase *Block : RPOT)
957982 Block->execute (State);
958983
959- State->CFG .DTU .flush ();
960-
961- auto *LoopRegion = getVectorLoopRegion ();
962- if (!LoopRegion)
963- return ;
964-
965- VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
966- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
967-
968984 // Fix the latch value of canonical, reduction and first-order recurrences
969985 // phis in the vector loop.
970- VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
971- for (VPRecipeBase &R : Header->phis ()) {
972- // Skip phi-like recipes that generate their backedege values themselves.
973- if (isa<VPWidenPHIRecipe>(&R))
986+ for (VPBasicBlock *Header :
987+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow (Entry))) {
988+ if (!Header->isHeader (State->VPDT ))
974989 continue ;
990+ for (VPRecipeBase &R : Header->phis ()) {
991+ if (isa<VPWidenPHIRecipe>(&R))
992+ continue ;
975993
976- if (isa<VPWidenInductionRecipe>(&R)) {
977- PHINode *Phi = nullptr ;
978- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
979- Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
980- } else {
981- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
982- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
983- " recipe generating only scalars should have been replaced" );
984- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
985- Phi = cast<PHINode>(GEP->getPointerOperand ());
994+ auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors ()[1 ]);
995+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
996+
997+ if (isa<VPWidenInductionRecipe>(&R)) {
998+ PHINode *Phi = nullptr ;
999+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1000+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1001+ } else {
1002+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1003+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1004+ " recipe generating only scalars should have been replaced" );
1005+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1006+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1007+ }
1008+
1009+ Phi->setIncomingBlock (1 , VectorLatchBB);
1010+
1011+ // Move the last step to the end of the latch block. This ensures
1012+ // consistent placement of all induction updates.
1013+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1014+ Inc->moveBefore (
1015+ std::prev (VectorLatchBB->getTerminator ()->getIterator ()));
1016+
1017+ // Use the steps for the last part as backedge value for the induction.
1018+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1019+ Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1020+ continue ;
9861021 }
9871022
988- Phi-> setIncomingBlock ( 1 , VectorLatchBB );
989-
990- // Move the last step to the end of the latch block. This ensures
991- // consistent placement of all induction updates.
992- Instruction *Inc = cast<Instruction>(Phi-> getIncomingValue ( 1 ));
993- Inc-> moveBefore ( std::prev (VectorLatchBB-> getTerminator ()-> getIterator ()));
994-
995- // Use the steps for the last part as backedge value for the induction.
996- if ( auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
997- Inc-> setOperand ( 0 , State->get (IV-> getLastUnrolledPartOperand ()) );
998- continue ;
1023+ auto *PhiR = cast<VPSingleDefRecipe>(&R );
1024+ // VPInstructions currently model scalar Phis only.
1025+ bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1026+ (isa<VPReductionPHIRecipe>(PhiR) &&
1027+ cast<VPReductionPHIRecipe>(PhiR)-> isInLoop ( ));
1028+
1029+ Value *Phi = State-> get (PhiR, NeedsScalar);
1030+ // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1031+ // not.
1032+ Value *Val = State->get (PhiR-> getOperand ( 1 ), NeedsScalar );
1033+ cast<PHINode>(Phi)-> addIncoming (Val, VectorLatchBB) ;
9991034 }
1000-
1001- auto *PhiR = cast<VPSingleDefRecipe>(&R);
1002- // VPInstructions currently model scalar Phis only.
1003- bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1004- (isa<VPReductionPHIRecipe>(PhiR) &&
1005- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1006- Value *Phi = State->get (PhiR, NeedsScalar);
1007- // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1008- Value *Val = State->get (PhiR->getOperand (1 ), NeedsScalar);
1009- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
10101035 }
10111036}
10121037
@@ -1365,16 +1390,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
13651390
13661391#endif
13671392
1368- // / Returns true if there is a vector loop region and \p VPV is defined in a
1369- // / loop region.
1370- static bool isDefinedInsideLoopRegions (const VPValue *VPV) {
1371- const VPRecipeBase *DefR = VPV->getDefiningRecipe ();
1372- return DefR && (!DefR->getParent ()->getPlan ()->getVectorLoopRegion () ||
1373- DefR->getParent ()->getEnclosingLoopRegion ());
1374- }
1375-
13761393bool VPValue::isDefinedOutsideLoopRegions () const {
1377- return !isDefinedInsideLoopRegions (this );
1394+ auto *DefR = getDefiningRecipe ();
1395+ if (!DefR)
1396+ return true ;
1397+
1398+ const VPBasicBlock *DefVPBB = DefR->getParent ();
1399+ auto *Plan = DefVPBB->getPlan ();
1400+ if (Plan->getVectorLoopRegion ())
1401+ return !DefR->getParent ()->getEnclosingLoopRegion ();
1402+ return DefVPBB == Plan->getEntry ();
13781403}
13791404void VPValue::replaceAllUsesWith (VPValue *New) {
13801405 replaceUsesWithIf (New, [](VPUser &, unsigned ) { return true ; });
0 commit comments