@@ -207,6 +207,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207207 return Parent->getEnclosingBlockWithPredecessors ();
208208}
209209
210+ bool VPBasicBlock::isHeader (const VPDominatorTree &VPDT) const {
211+ return getPredecessors ().size () == 2 &&
212+ VPDT.dominates (this , getPredecessors ()[1 ]);
213+ }
214+
210215VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi () {
211216 iterator It = begin ();
212217 while (It != end () && It->isPhi ())
@@ -351,8 +356,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
351356}
352357
353358BasicBlock *VPTransformState::CFGState::getPreheaderBBFor (VPRecipeBase *R) {
354- VPRegionBlock *LoopRegion = R->getParent ()->getEnclosingLoopRegion ();
355- return VPBB2IRBB[LoopRegion->getPreheaderVPBB ()];
359+ return VPBB2IRBB[cast<VPBasicBlock>(R->getParent ()->getPredecessors ()[0 ])];
356360}
357361
358362void VPTransformState::addNewMetadata (Instruction *To,
@@ -437,14 +441,18 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
437441 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
438442 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
439443 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
440- BasicBlock *PredBB = CFG.VPBB2IRBB [PredVPBB];
444+ BasicBlock *PredBB = CFG.VPBB2IRBB .lookup (PredVPBB);
445+ if (!PredBB)
446+ continue ;
441447
442448 assert (PredBB && " Predecessor basic-block not found building successor." );
443449 auto *PredBBTerminator = PredBB->getTerminator ();
444450 LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
445451
446452 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
447453 if (isa<UnreachableInst>(PredBBTerminator)) {
454+ if (PredVPSuccessors.size () == 2 )
455+ continue ;
448456 assert (PredVPSuccessors.size () == 1 &&
449457 " Predecessor ending w/o branch must have single successor." );
450458 DebugLoc DL = PredBBTerminator->getDebugLoc ();
@@ -500,11 +508,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
500508 bool Replica = bool (State->Lane );
501509 BasicBlock *NewBB = State->CFG .PrevBB ; // Reuse it if possible.
502510
511+ if (isHeader (State->VPDT )) {
512+ // Create and register the new vector loop.
513+ Loop *PrevParentLoop = State->CurrentParentLoop ;
514+ State->CurrentParentLoop = State->LI ->AllocateLoop ();
515+
516+ // Insert the new loop into the loop nest and register the new basic blocks
517+ // before calling any utilities such as SCEV that require valid LoopInfo.
518+ if (PrevParentLoop)
519+ PrevParentLoop->addChildLoop (State->CurrentParentLoop );
520+ else
521+ State->LI ->addTopLevelLoop (State->CurrentParentLoop );
522+ }
523+
503524 auto IsReplicateRegion = [](VPBlockBase *BB) {
504525 auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
505- return R && R->isReplicator ();
526+ assert ((!R || R->isReplicator ()) &&
527+ " only replicate region blocks should remain" );
528+ return R;
506529 };
507-
508530 // 1. Create an IR basic block.
509531 if ((Replica && this == getParent ()->getEntry ()) ||
510532 IsReplicateRegion (getSingleHierarchicalPredecessor ())) {
@@ -527,6 +549,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
527549
528550 // 2. Fill the IR basic block with IR instructions.
529551 executeRecipes (State, NewBB);
552+
553+ // If this block is a latch, update CurrentParentLoop.
554+ if (any_of (getSuccessors (), [State, this ](VPBlockBase *Succ) {
555+ auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
556+ return VPBB && VPBB->isHeader (State->VPDT ) &&
557+ State->VPDT .dominates (Succ, this );
558+ }))
559+ State->CurrentParentLoop = State->CurrentParentLoop ->getParentLoop ();
530560}
531561
532562VPBasicBlock *VPBasicBlock::clone () {
@@ -739,35 +769,13 @@ VPRegionBlock *VPRegionBlock::clone() {
739769}
740770
741771void VPRegionBlock::execute (VPTransformState *State) {
742- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
743- RPOT (Entry);
744-
745- if (!isReplicator ()) {
746- // Create and register the new vector loop.
747- Loop *PrevParentLoop = State->CurrentParentLoop ;
748- State->CurrentParentLoop = State->LI ->AllocateLoop ();
749-
750- // Insert the new loop into the loop nest and register the new basic blocks
751- // before calling any utilities such as SCEV that require valid LoopInfo.
752- if (PrevParentLoop)
753- PrevParentLoop->addChildLoop (State->CurrentParentLoop );
754- else
755- State->LI ->addTopLevelLoop (State->CurrentParentLoop );
756-
757- // Visit the VPBlocks connected to "this", starting from it.
758- for (VPBlockBase *Block : RPOT) {
759- LLVM_DEBUG (dbgs () << " LV: VPBlock in RPO " << Block->getName () << ' \n ' );
760- Block->execute (State);
761- }
762-
763- State->CurrentParentLoop = PrevParentLoop;
764- return ;
765- }
766-
772+ assert (isReplicator () &&
773+ " Loop regions should have been lowered to plain CFG" );
767774 assert (!State->Lane && " Replicating a Region with non-null instance." );
768-
769- // Enter replicating mode.
770775 assert (!State->VF .isScalable () && " VF is assumed to be non scalable." );
776+
777+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
778+ Entry);
771779 State->Lane = VPLane (0 );
772780 for (unsigned Lane = 0 , VF = State->VF .getKnownMinValue (); Lane < VF;
773781 ++Lane) {
@@ -842,6 +850,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
842850}
843851#endif
844852
853+ void VPRegionBlock::removeRegion () {
854+ auto *Header = cast<VPBasicBlock>(getEntry ());
855+ VPBlockBase *Preheader = getSinglePredecessor ();
856+ auto *Exiting = cast<VPBasicBlock>(getExiting ());
857+
858+ VPBlockBase *Middle = getSingleSuccessor ();
859+ VPBlockUtils::disconnectBlocks (Preheader, this );
860+ VPBlockUtils::disconnectBlocks (this , Middle);
861+
862+ for (VPBlockBase *VPB : vp_depth_first_shallow (Entry))
863+ VPB->setParent (nullptr );
864+
865+ VPBlockUtils::connectBlocks (Preheader, Header);
866+ VPBlockUtils::connectBlocks (Exiting, Middle);
867+ }
868+
845869VPlan::VPlan (Loop *L) {
846870 setEntry (createVPIRBasicBlock (L->getLoopPreheader ()));
847871 ScalarHeader = createVPIRBasicBlock (L->getHeader ());
@@ -951,57 +975,57 @@ void VPlan::execute(VPTransformState *State) {
951975 for (VPBlockBase *Block : RPOT)
952976 Block->execute (State);
953977
954- State->CFG .DTU .flush ();
955-
956- auto *LoopRegion = getVectorLoopRegion ();
957- if (!LoopRegion)
958- return ;
959-
960- VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
961- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
962-
963978 // Fix the latch value of canonical, reduction and first-order recurrences
964979 // phis in the vector loop.
965- VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
966- for (VPRecipeBase &R : Header->phis ()) {
967- // Skip phi-like recipes that generate their backedege values themselves.
968- if (isa<VPWidenPHIRecipe>(&R))
980+ for (VPBasicBlock *Header :
981+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow (Entry))) {
982+ if (!Header->isHeader (State->VPDT ))
969983 continue ;
984+ for (VPRecipeBase &R : Header->phis ()) {
985+ if (isa<VPWidenPHIRecipe>(&R))
986+ continue ;
970987
971- if (isa<VPWidenInductionRecipe>(&R)) {
972- PHINode *Phi = nullptr ;
973- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
974- Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
975- } else {
976- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
977- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
978- " recipe generating only scalars should have been replaced" );
979- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
980- Phi = cast<PHINode>(GEP->getPointerOperand ());
988+ auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors ()[1 ]);
989+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
990+
991+ if (isa<VPWidenInductionRecipe>(&R)) {
992+ PHINode *Phi = nullptr ;
993+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
994+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
995+ } else {
996+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
997+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
998+ " recipe generating only scalars should have been replaced" );
999+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1000+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1001+ }
1002+
1003+ Phi->setIncomingBlock (1 , VectorLatchBB);
1004+
1005+ // Move the last step to the end of the latch block. This ensures
1006+ // consistent placement of all induction updates.
1007+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1008+ Inc->moveBefore (
1009+ std::prev (VectorLatchBB->getTerminator ()->getIterator ()));
1010+
1011+ // Use the steps for the last part as backedge value for the induction.
1012+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1013+ Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1014+ continue ;
9811015 }
9821016
983- Phi-> setIncomingBlock ( 1 , VectorLatchBB );
984-
985- // Move the last step to the end of the latch block. This ensures
986- // consistent placement of all induction updates.
987- Instruction *Inc = cast<Instruction>(Phi-> getIncomingValue ( 1 ));
988- Inc-> moveBefore ( std::prev (VectorLatchBB-> getTerminator ()-> getIterator ()));
989-
990- // Use the steps for the last part as backedge value for the induction.
991- if ( auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
992- Inc-> setOperand ( 0 , State->get (IV-> getLastUnrolledPartOperand ()) );
993- continue ;
1017+ auto *PhiR = cast<VPSingleDefRecipe>(&R );
1018+ // VPInstructions currently model scalar Phis only.
1019+ bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1020+ (isa<VPReductionPHIRecipe>(PhiR) &&
1021+ cast<VPReductionPHIRecipe>(PhiR)-> isInLoop ( ));
1022+
1023+ Value *Phi = State-> get (PhiR, NeedsScalar);
1024+ // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1025+ // not.
1026+ Value *Val = State->get (PhiR-> getOperand ( 1 ), NeedsScalar );
1027+ cast<PHINode>(Phi)-> addIncoming (Val, VectorLatchBB) ;
9941028 }
995-
996- auto *PhiR = cast<VPSingleDefRecipe>(&R);
997- // VPInstructions currently model scalar Phis only.
998- bool NeedsScalar = isa<VPInstruction>(PhiR) ||
999- (isa<VPReductionPHIRecipe>(PhiR) &&
1000- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1001- Value *Phi = State->get (PhiR, NeedsScalar);
1002- // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1003- Value *Val = State->get (PhiR->getOperand (1 ), NeedsScalar);
1004- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
10051029 }
10061030}
10071031
@@ -1360,16 +1384,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
13601384
13611385#endif
13621386
1363- // / Returns true if there is a vector loop region and \p VPV is defined in a
1364- // / loop region.
1365- static bool isDefinedInsideLoopRegions (const VPValue *VPV) {
1366- const VPRecipeBase *DefR = VPV->getDefiningRecipe ();
1367- return DefR && (!DefR->getParent ()->getPlan ()->getVectorLoopRegion () ||
1368- DefR->getParent ()->getEnclosingLoopRegion ());
1369- }
1370-
13711387bool VPValue::isDefinedOutsideLoopRegions () const {
1372- return !isDefinedInsideLoopRegions (this );
1388+ auto *DefR = getDefiningRecipe ();
1389+ if (!DefR)
1390+ return true ;
1391+
1392+ const VPBasicBlock *DefVPBB = DefR->getParent ();
1393+ auto *Plan = DefVPBB->getPlan ();
1394+ if (Plan->getVectorLoopRegion ())
1395+ return !DefR->getParent ()->getEnclosingLoopRegion ();
1396+ return DefVPBB == Plan->getEntry ();
13731397}
13741398void VPValue::replaceAllUsesWith (VPValue *New) {
13751399 replaceUsesWithIf (New, [](VPUser &, unsigned ) { return true ; });
0 commit comments