@@ -9163,6 +9163,31 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
91639163 }
91649164}
91659165
9166+ // Add the necessary canonical IV and branch recipes required to control the
9167+ // loop.
9168+ static void addCanonicalIVRecipes (VPlan &Plan, Type *IdxTy, bool HasNUW,
9169+ DebugLoc DL) {
9170+ Value *StartIdx = ConstantInt::get (IdxTy, 0 );
9171+ auto *StartV = Plan.getOrAddLiveIn (StartIdx);
9172+
9173+ // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
9174+ auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe (StartV, DL);
9175+ VPRegionBlock *TopRegion = Plan.getVectorLoopRegion ();
9176+ VPBasicBlock *Header = TopRegion->getEntryBasicBlock ();
9177+ Header->insert (CanonicalIVPHI, Header->begin ());
9178+
9179+ VPBuilder Builder (TopRegion->getExitingBasicBlock ());
9180+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
9181+ auto *CanonicalIVIncrement = Builder.createOverflowingOp (
9182+ Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF ()}, {HasNUW, false }, DL,
9183+ " index.next" );
9184+ CanonicalIVPHI->addOperand (CanonicalIVIncrement);
9185+
9186+ // Add the BranchOnCount VPInstruction to the latch.
9187+ Builder.createNaryOp (VPInstruction::BranchOnCount,
9188+ {CanonicalIVIncrement, &Plan.getVectorTripCount ()}, DL);
9189+ }
9190+
91669191// / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
91679192// / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
91689193// / the end value of the induction.
@@ -9434,8 +9459,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94349459 auto Plan = VPlanTransforms::buildPlainCFG (OrigLoop, *LI, VPB2IRBB);
94359460 VPlanTransforms::prepareForVectorization (
94369461 *Plan, Legal->getWidestInductionType (), PSE, RequiresScalarEpilogueCheck,
9437- CM.foldTailByMasking (), OrigLoop,
9438- getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ()));
9462+ CM.foldTailByMasking (), OrigLoop);
94399463 VPlanTransforms::createLoopRegions (*Plan);
94409464
94419465 // Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9446,22 +9470,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94469470 for (ElementCount VF : Range)
94479471 IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse (&CM, VF);
94489472
9473+ DebugLoc DL = getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ());
94499474 TailFoldingStyle Style = CM.getTailFoldingStyle (IVUpdateMayOverflow);
94509475 // Use NUW for the induction increment if we proved that it won't overflow in
94519476 // the vector loop or when not folding the tail. In the later case, we know
94529477 // that the canonical induction increment will not overflow as the vector trip
94539478 // count is >= increment and a multiple of the increment.
94549479 bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
9455- if (!HasNUW) {
9456- auto *IVInc = Plan->getVectorLoopRegion ()
9457- ->getExitingBasicBlock ()
9458- ->getTerminator ()
9459- ->getOperand (0 );
9460- assert (match (IVInc, m_VPInstruction<Instruction::Add>(
9461- m_Specific (Plan->getCanonicalIV ()), m_VPValue ())) &&
9462- " Did not find the canonical IV increment" );
9463- cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags ();
9464- }
9480+ addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW, DL);
94659481
94669482 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
94679483 Builder);
@@ -9735,13 +9751,19 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
97359751 DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
97369752 auto Plan = VPlanTransforms::buildPlainCFG (OrigLoop, *LI, VPB2IRBB);
97379753 VPlanTransforms::prepareForVectorization (
9738- *Plan, Legal->getWidestInductionType (), PSE, true , false , OrigLoop,
9739- getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ()));
9754+ *Plan, Legal->getWidestInductionType (), PSE, true , false , OrigLoop);
97409755 VPlanTransforms::createLoopRegions (*Plan);
97419756
97429757 for (ElementCount VF : Range)
97439758 Plan->addVF (VF);
97449759
9760+ // Tail folding is not supported for outer loops, so the induction increment
9761+ // is guaranteed to not wrap.
9762+ bool HasNUW = true ;
9763+ addCanonicalIVRecipes (
9764+ *Plan, Legal->getWidestInductionType (), HasNUW,
9765+ getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ()));
9766+
97459767 if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes (
97469768 Plan,
97479769 [this ](PHINode *P) {
0 commit comments