@@ -384,6 +384,11 @@ static cl::opt<bool> UseWiderVFIfCallVariantsPresent(
384384 cl::Hidden,
385385 cl::desc(" Try wider VFs if they enable the use of vector variants" ));
386386
387+ static cl::opt<bool > EnableEarlyExitVectorization (
388+ " enable-early-exit-vectorization" , cl::init(false ), cl::Hidden,
389+ cl::desc(
390+ " Enable vectorization of early exit loops with uncountable exits." ));
391+
387392// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
388393// variables not overflowing do not hold. See `emitSCEVChecks`.
389394static constexpr uint32_t SCEVCheckBypassWeights[] = {1 , 127 };
@@ -1358,14 +1363,13 @@ class LoopVectorizationCostModel {
13581363 LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue\n " );
13591364 return false ;
13601365 }
1361- // If we might exit from anywhere but the latch, must run the exiting
1362- // iteration in scalar form.
1363- if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1364- if (!Legal->canVectorizeEarlyExit ()) {
1365- LLVM_DEBUG (
1366- dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1367- return true ;
1368- }
1366+ // If we might exit from anywhere but the latch and early exit vectorization
1367+ // is disabled, we must run the exiting iteration in scalar form.
1368+ if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch () &&
1369+ !(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit ())) {
1370+ LLVM_DEBUG (
1371+ dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1372+ return true ;
13691373 }
13701374 if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
13711375 LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
@@ -2576,7 +2580,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25762580 assert (LoopVectorPreHeader && " Invalid loop structure" );
25772581 LoopExitBlock = OrigLoop->getUniqueExitBlock (); // may be nullptr
25782582 assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ()) ||
2579- Legal->canVectorizeEarlyExit ()) &&
2583+ Legal->hasUncountableEarlyExit ()) &&
25802584 " multiple exit loop without required epilogue?" );
25812585
25822586 LoopMiddleBlock =
@@ -2809,6 +2813,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
28092813 }
28102814 }
28112815
2816+ assert ((MissingVals.empty () || OrigLoop->getUniqueExitBlock ()) &&
2817+ " Expected a single exit block for escaping values" );
28122818 for (auto &I : MissingVals) {
28132819 PHINode *PHI = cast<PHINode>(I.first );
28142820 // One corner case we have to handle is two IVs "chasing" each-other,
@@ -2819,9 +2825,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
28192825 if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
28202826 PHI->addIncoming (I.second , MiddleBlock);
28212827 }
2822-
2823- assert ((MissingVals.empty () || OrigLoop->getUniqueExitBlock ()) &&
2824- " Expected a single exit block" );
28252828}
28262829
28272830namespace {
@@ -3597,7 +3600,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
35973600 for (BasicBlock *E : Exiting) {
35983601 auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
35993602 if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse () &&
3600- (TheLoop->getLoopLatch () == E || !Legal->canVectorizeEarlyExit ()))
3603+ (TheLoop->getLoopLatch () == E || !Legal->hasUncountableEarlyExit ()))
36013604 AddToWorklistIfAllowed (Cmp);
36023605 }
36033606
@@ -8144,7 +8147,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
81448147 // If source is an exiting block, we know the exit edge is dynamically dead
81458148 // in the vector loop, and thus we don't need to restrict the mask. Avoid
81468149 // adding uses of an otherwise potentially dead instruction.
8147- if (!Legal->canVectorizeEarlyExit () && OrigLoop->isLoopExiting (Src))
8150+ if (!Legal->hasUncountableEarlyExit () && OrigLoop->isLoopExiting (Src))
81488151 return EdgeMaskCache[Edge] = SrcMask;
81498152
81508153 VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8835,39 +8838,43 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
88358838static SetVector<VPIRInstruction *> collectUsersInExitBlock (
88368839 Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
88378840 const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8838- auto *MiddleVPBB = Plan.getMiddleBlock ();
8839- // No edge from the middle block to the unique exit block has been inserted
8840- // and there is nothing to fix from vector loop; phis should have incoming
8841- // from scalar loop only.
8842- if (MiddleVPBB->getNumSuccessors () != 2 )
8843- return {};
88448841 SetVector<VPIRInstruction *> ExitUsersToFix;
8845- VPBasicBlock *ExitVPBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors ()[0 ]);
8846- BasicBlock *ExitingBB = OrigLoop->getExitingBlock ();
8847- for (VPRecipeBase &R : *ExitVPBB) {
8848- auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8849- if (!ExitIRI)
8850- continue ;
8851- auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction ());
8852- if (!ExitPhi)
8853- break ;
8854- Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
8855- VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
8856- // Exit values for inductions are computed and updated outside of VPlan and
8857- // independent of induction recipes.
8858- // TODO: Compute induction exit values in VPlan.
8859- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8860- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8861- isa<VPWidenPointerInductionRecipe>(V) ||
8862- (isa<Instruction>(IncomingValue) &&
8863- OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8864- any_of (IncomingValue->users (), [&Inductions](User *U) {
8865- auto *P = dyn_cast<PHINode>(U);
8866- return P && Inductions.contains (P);
8867- })))
8842+ for (VPBlockBase *VPB : vp_depth_first_shallow (
8843+ Plan.getVectorLoopRegion ()->getSingleSuccessor ())) {
8844+ if (VPB->getNumSuccessors () != 0 || VPB == Plan.getScalarHeader ())
88688845 continue ;
8869- ExitUsersToFix.insert (ExitIRI);
8870- ExitIRI->addOperand (V);
8846+ auto *ExitVPBB = cast<VPIRBasicBlock>(VPB);
8847+ BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock ();
8848+ BasicBlock *ExitingBB = find_singleton<BasicBlock>(
8849+ to_vector (predecessors (ExitBB)),
8850+ [OrigLoop](BasicBlock *Pred, bool AllowRepeats) {
8851+ return OrigLoop->contains (Pred) ? Pred : nullptr ;
8852+ });
8853+ for (VPRecipeBase &R : *ExitVPBB) {
8854+ auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8855+ if (!ExitIRI)
8856+ continue ;
8857+ auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction ());
8858+ if (!ExitPhi)
8859+ break ;
8860+ Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
8861+ VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
8862+ // Exit values for inductions are computed and updated outside of VPlan
8863+ // and independent of induction recipes.
8864+ // TODO: Compute induction exit values in VPlan.
8865+ if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8866+ !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8867+ isa<VPWidenPointerInductionRecipe>(V) ||
8868+ (isa<Instruction>(IncomingValue) &&
8869+ OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8870+ any_of (IncomingValue->users (), [&Inductions](User *U) {
8871+ auto *P = dyn_cast<PHINode>(U);
8872+ return P && Inductions.contains (P);
8873+ })))
8874+ continue ;
8875+ ExitUsersToFix.insert (ExitIRI);
8876+ ExitIRI->addOperand (V);
8877+ }
88718878 }
88728879 return ExitUsersToFix;
88738880}
@@ -9168,16 +9175,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91689175 " VPBasicBlock" );
91699176 RecipeBuilder.fixHeaderPhis ();
91709177
9171- if (Legal->canVectorizeEarlyExit ()) {
9172- VPlanTransforms::convertToMultiCond (*Plan, *PSE.getSE (), OrigLoop,
9173- RecipeBuilder);
9174- } else {
9178+ if (Legal->hasUncountableEarlyExit ()) {
9179+ VPlanTransforms::handleUncountableEarlyExit (*Plan, *PSE.getSE (), OrigLoop,
9180+ RecipeBuilder);
9181+ }
91759182 addScalarResumePhis (RecipeBuilder, *Plan);
91769183 SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
91779184 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
91789185 addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
91799186 addUsersInExitBlock (*Plan, ExitUsersToFix);
9180- }
91819187
91829188 // ---------------------------------------------------------------------------
91839189 // Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9940,12 +9946,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
99409946 }
99419947
99429948 if (LVL.hasUncountableEarlyExit ()) {
9943- reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
9944- " early exit is not yet supported" ,
9945- " Auto-vectorization of loops with uncountable "
9946- " early exit is not yet supported" ,
9947- " UncountableEarlyExitLoopsUnsupported" , ORE, L);
9948- return false ;
9949+ if (!EnableEarlyExitVectorization) {
9950+ reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
9951+ " early exit is not yet supported" ,
9952+ " Auto-vectorization of loops with uncountable "
9953+ " early exit is not yet supported" ,
9954+ " UncountableEarlyExitLoopsUnsupported" , ORE,
9955+ L);
9956+ return false ;
9957+ }
99499958 }
99509959
99519960 // Entrance to the VPlan-native vectorization path. Outer loops are processed
0 commit comments