@@ -502,10 +502,8 @@ void VPlanTransforms::prepareForVectorization(
502502 cast<VPBasicBlock>(LatchVPB), Range);
503503 HandledUncountableEarlyExit = true ;
504504 } else {
505- for (VPRecipeBase &R : cast<VPIRBasicBlock>(EB)->phis ()) {
506- if (auto *PhiR = dyn_cast<VPIRPhi>(&R))
507- PhiR->removeIncomingValue (Pred);
508- }
505+ for (VPRecipeBase &R : EB->phis ())
506+ cast<VPIRPhi>(&R)->removeIncomingValue (Pred);
509507 }
510508 cast<VPBasicBlock>(Pred)->getTerminator ()->eraseFromParent ();
511509 VPBlockUtils::disconnectBlocks (Pred, EB);
@@ -530,44 +528,51 @@ void VPlanTransforms::prepareForVectorization(
530528 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock (" scalar.ph" );
531529 VPBlockUtils::connectBlocks (ScalarPH, Plan.getScalarHeader ());
532530
533- // If needed, add a check in the middle block to see if we have completed
534- // all of the iterations in the first vector loop. Three cases:
535- // 1) If we require a scalar epilogue, there is no conditional branch as
536- // we unconditionally branch to the scalar preheader. Remove the recipes
537- // from the exit blocks.
538- // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
539- // Thus if tail is to be folded, we know we don't need to run the
540- // remainder and we can set the condition to true.
541- // 3) Otherwise, construct a runtime check.
542531 VPBlockUtils::connectBlocks (MiddleVPBB, ScalarPH);
543532 // Also connect the entry block to the scalar preheader.
544533 // TODO: Also introduce a branch recipe together with the minimum trip count
545534 // check.
546535 VPBlockUtils::connectBlocks (Plan.getEntry (), ScalarPH);
547536 Plan.getEntry ()->swapSuccessors ();
548537
549- if (MiddleVPBB->getNumSuccessors () != 2 )
538+ // If MiddleVPBB has a single successor the original loop exits via the latch
539+ // and the single successor must be the scalar preheader. There's no need to
540+ // add a runtime check to MiddleVPBB.
541+ if (MiddleVPBB->getNumSuccessors () == 1 ) {
542+ assert (MiddleVPBB->getSingleSuccessor () == ScalarPH &&
543+ " must have ScalarPH as single successor" );
550544 return ;
545+ }
546+
547+ assert (MiddleVPBB->getNumSuccessors () == 2 && " must have 2 successors" );
548+
549+ // If needed, add a check in the middle block to see if we have completed
550+ // all of the iterations in the first vector loop.
551+ // Three cases:
552+ // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
553+ // condition to false.
554+ // 2) If (N - N%VF) == N, then we *don't* need to run the
555+ // remainder. Thus if tail is to be folded, we know we don't need to run
556+ // the remainder and we can set the condition to true.
557+ // 3) Otherwise, construct a runtime check.
551558
552- auto *ScalarLatchTerm = TheLoop-> getLoopLatch ()-> getTerminator ();
553- // Here we use the same DebugLoc as the scalar loop latch terminator instead
554- // of the corresponding compare because they may have ended up with
555- // different line numbers and we want to avoid awkward line stepping while
556- // debugging. Eg. if the compare has got a line number inside the loop.
559+ // We use the same DebugLoc as the scalar loop latch terminator instead of
560+ // the corresponding compare because they may have ended up with different
561+ // line numbers and we want to avoid awkward line stepping while debugging.
562+ // E.g., if the compare has got a line number inside the loop.
563+ DebugLoc LatchDL = TheLoop-> getLoopLatch ()-> getTerminator ()-> getDebugLoc ();
557564 VPBuilder Builder (MiddleVPBB);
558565 VPValue *Cmp;
559- if (TailFolded)
560- Cmp = Plan.getOrAddLiveIn (ConstantInt::getTrue (
561- IntegerType::getInt1Ty (TripCount->getType ()->getContext ())));
562- else if (!RequiresScalarEpilogueCheck)
566+ if (!RequiresScalarEpilogueCheck)
563567 Cmp = Plan.getOrAddLiveIn (ConstantInt::getFalse (
564568 IntegerType::getInt1Ty (TripCount->getType ()->getContext ())));
569+ else if (TailFolded)
570+ Cmp = Plan.getOrAddLiveIn (ConstantInt::getTrue (
571+ IntegerType::getInt1Ty (TripCount->getType ()->getContext ())));
565572 else
566573 Cmp = Builder.createICmp (CmpInst::ICMP_EQ, Plan.getTripCount (),
567- &Plan.getVectorTripCount (),
568- ScalarLatchTerm->getDebugLoc (), " cmp.n" );
569- Builder.createNaryOp (VPInstruction::BranchOnCond, {Cmp},
570- ScalarLatchTerm->getDebugLoc ());
574+ &Plan.getVectorTripCount (), LatchDL, " cmp.n" );
575+ Builder.createNaryOp (VPInstruction::BranchOnCond, {Cmp}, LatchDL);
571576}
572577
573578void VPlanTransforms::createLoopRegions (VPlan &Plan) {
0 commit comments