@@ -501,8 +501,10 @@ void VPlanTransforms::prepareForVectorization(
501501 cast<VPBasicBlock>(HeaderVPB),
502502 cast<VPBasicBlock>(LatchVPB), Range);
503503 HandledUncountableEarlyExit = true ;
504+ } else {
505+ for (VPRecipeBase &R : EB->phis ())
506+ cast<VPIRPhi>(&R)->removeIncomingValueFor (Pred);
504507 }
505-
506508 cast<VPBasicBlock>(Pred)->getTerminator ()->eraseFromParent ();
507509 VPBlockUtils::disconnectBlocks (Pred, EB);
508510 }
@@ -526,32 +528,6 @@ void VPlanTransforms::prepareForVectorization(
526528 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock (" scalar.ph" );
527529 VPBlockUtils::connectBlocks (ScalarPH, Plan.getScalarHeader ());
528530
529- // If needed, add a check in the middle block to see if we have completed
530- // all of the iterations in the first vector loop. Three cases:
531- // 1) If we require a scalar epilogue, there is no conditional branch as
532- // we unconditionally branch to the scalar preheader. Remove the recipes
533- // from the exit blocks.
534- // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
535- // Thus if tail is to be folded, we know we don't need to run the
536- // remainder and we can set the condition to true.
537- // 3) Otherwise, construct a runtime check.
538-
539- if (!RequiresScalarEpilogueCheck) {
540- if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor ())
541- VPBlockUtils::disconnectBlocks (MiddleVPBB, LatchExitVPB);
542- VPBlockUtils::connectBlocks (MiddleVPBB, ScalarPH);
543- VPBlockUtils::connectBlocks (Plan.getEntry (), ScalarPH);
544- Plan.getEntry ()->swapSuccessors ();
545-
546- // The exit blocks are unreachable, remove their recipes to make sure no
547- // users remain that may pessimize transforms.
548- for (auto *EB : Plan.getExitBlocks ()) {
549- for (VPRecipeBase &R : make_early_inc_range (*EB))
550- R.eraseFromParent ();
551- }
552- return ;
553- }
554-
555531 // The connection order corresponds to the operands of the conditional branch,
556532 // with the middle block already connected to the exit block.
557533 VPBlockUtils::connectBlocks (MiddleVPBB, ScalarPH);
@@ -561,21 +537,45 @@ void VPlanTransforms::prepareForVectorization(
561537 VPBlockUtils::connectBlocks (Plan.getEntry (), ScalarPH);
562538 Plan.getEntry ()->swapSuccessors ();
563539
564- auto *ScalarLatchTerm = TheLoop->getLoopLatch ()->getTerminator ();
565- // Here we use the same DebugLoc as the scalar loop latch terminator instead
566- // of the corresponding compare because they may have ended up with
567- // different line numbers and we want to avoid awkward line stepping while
568- // debugging. Eg. if the compare has got a line number inside the loop.
540+ // If MiddleVPBB has a single successor then the original loop does not exit
541+ // via the latch and the single successor must be the scalar preheader.
542+ // There's no need to add a runtime check to MiddleVPBB.
543+ if (MiddleVPBB->getNumSuccessors () == 1 ) {
544+ assert (MiddleVPBB->getSingleSuccessor () == ScalarPH &&
545+ " must have ScalarPH as single successor" );
546+ return ;
547+ }
548+
549+ assert (MiddleVPBB->getNumSuccessors () == 2 && " must have 2 successors" );
550+
551+ // Add a check in the middle block to see if we have completed all of the
552+ // iterations in the first vector loop.
553+ //
554+ // Three cases:
555+ // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
556+ // condition to false.
557+ // 2) If (N - N%VF) == N, then we *don't* need to run the
558+ // remainder. Thus if tail is to be folded, we know we don't need to run
559+ // the remainder and we can set the condition to true.
560+ // 3) Otherwise, construct a runtime check.
561+
562+ // We use the same DebugLoc as the scalar loop latch terminator instead of
563+ // the corresponding compare because they may have ended up with different
564+ // line numbers and we want to avoid awkward line stepping while debugging.
565+ // E.g., if the compare has got a line number inside the loop.
566+ DebugLoc LatchDL = TheLoop->getLoopLatch ()->getTerminator ()->getDebugLoc ();
569567 VPBuilder Builder (MiddleVPBB);
570- VPValue *Cmp =
571- TailFolded
572- ? Plan.getOrAddLiveIn (ConstantInt::getTrue (
573- IntegerType::getInt1Ty (TripCount->getType ()->getContext ())))
574- : Builder.createICmp (CmpInst::ICMP_EQ, Plan.getTripCount (),
575- &Plan.getVectorTripCount (),
576- ScalarLatchTerm->getDebugLoc (), " cmp.n" );
577- Builder.createNaryOp (VPInstruction::BranchOnCond, {Cmp},
578- ScalarLatchTerm->getDebugLoc ());
568+ VPValue *Cmp;
569+ if (!RequiresScalarEpilogueCheck)
570+ Cmp = Plan.getOrAddLiveIn (ConstantInt::getFalse (
571+ IntegerType::getInt1Ty (TripCount->getType ()->getContext ())));
572+ else if (TailFolded)
573+ Cmp = Plan.getOrAddLiveIn (ConstantInt::getTrue (
574+ IntegerType::getInt1Ty (TripCount->getType ()->getContext ())));
575+ else
576+ Cmp = Builder.createICmp (CmpInst::ICMP_EQ, Plan.getTripCount (),
577+ &Plan.getVectorTripCount (), LatchDL, " cmp.n" );
578+ Builder.createNaryOp (VPInstruction::BranchOnCond, {Cmp}, LatchDL);
579579}
580580
581581void VPlanTransforms::createLoopRegions (VPlan &Plan) {
0 commit comments