@@ -385,6 +385,11 @@ static cl::opt<bool> UseWiderVFIfCallVariantsPresent(
385385 cl::Hidden,
386386 cl::desc(" Try wider VFs if they enable the use of vector variants" ));
387387
388+ static cl::opt<bool > EnableEarlyExitVectorization (
389+ " enable-early-exit-vectorization" , cl::init(false ), cl::Hidden,
390+ cl::desc(
391+ " Enable vectorization of early exit loops with uncountable exits." ));
392+
388393// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
389394// variables not overflowing do not hold. See `emitSCEVChecks`.
390395static constexpr uint32_t SCEVCheckBypassWeights[] = {1 , 127 };
@@ -1350,9 +1355,10 @@ class LoopVectorizationCostModel {
13501355 LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue\n " );
13511356 return false ;
13521357 }
1353- // If we might exit from anywhere but the latch, must run the exiting
1354- // iteration in scalar form.
1355- if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1358+ // If we might exit from anywhere but the latch and early exit vectorization
1359+ // is disabled, we must run the exiting iteration in scalar form.
1360+ if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch () &&
1361+ !(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit ())) {
13561362 LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: not exiting "
13571363 " from latch block\n " );
13581364 return true ;
@@ -2568,9 +2574,9 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
25682574void InnerLoopVectorizer::createVectorLoopSkeleton (StringRef Prefix) {
25692575 LoopVectorPreHeader = OrigLoop->getLoopPreheader ();
25702576 assert (LoopVectorPreHeader && " Invalid loop structure" );
2571- assert ((OrigLoop->getUniqueExitBlock () ||
2577+ assert ((OrigLoop->getUniqueLatchExitBlock () ||
25722578 Cost->requiresScalarEpilogue (VF.isVector ())) &&
2573- " multiple exit loop without required epilogue?" );
2579+ " loops not exiting via the latch without required epilogue?" );
25742580
25752581 LoopMiddleBlock =
25762582 SplitBlock (LoopVectorPreHeader, LoopVectorPreHeader->getTerminator (), DT,
@@ -2753,8 +2759,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
27532759 // value (the value that feeds into the phi from the loop latch).
27542760 // We allow both, but they, obviously, have different values.
27552761
2756- assert (OrigLoop->getUniqueExitBlock () && " Expected a single exit block" );
2757-
27582762 DenseMap<Value *, Value *> MissingVals;
27592763
27602764 Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
@@ -2808,6 +2812,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
28082812 }
28092813 }
28102814
2815+ assert ((MissingVals.empty () || OrigLoop->getUniqueExitBlock ()) &&
2816+ " Expected a single exit block for escaping values" );
28112817 for (auto &I : MissingVals) {
28122818 PHINode *PHI = cast<PHINode>(I.first );
28132819 // One corner case we have to handle is two IVs "chasing" each-other,
@@ -3591,7 +3597,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
35913597 TheLoop->getExitingBlocks (Exiting);
35923598 for (BasicBlock *E : Exiting) {
35933599 auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
3594- if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
3600+ if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse () &&
3601+ (TheLoop->getLoopLatch () == E || !Legal->hasUncountableEarlyExit ()))
35953602 AddToWorklistIfAllowed (Cmp);
35963603 }
35973604
@@ -7775,6 +7782,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77757782 LoopVectorizeHints Hints (L, true , *ORE);
77767783 Hints.setAlreadyVectorized ();
77777784 }
7785+
77787786 TargetTransformInfo::UnrollingPreferences UP;
77797787 TTI.getUnrollingPreferences (L, *PSE.getSE (), UP, ORE);
77807788 if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7787,15 +7795,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77877795 ILV.printDebugTracesAtEnd ();
77887796
77897797 // 4. Adjust branch weight of the branch in the middle block.
7790- auto *MiddleTerm =
7791- cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7792- if (MiddleTerm->isConditional () &&
7793- hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7794- // Assume that `Count % VectorTripCount` is equally distributed.
7795- unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7796- assert (TripCount > 0 && " trip count should not be zero" );
7797- const uint32_t Weights[] = {1 , TripCount - 1 };
7798- setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7798+ if (ExitVPBB) {
7799+ auto *MiddleTerm =
7800+ cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7801+ if (MiddleTerm->isConditional () &&
7802+ hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7803+ // Assume that `Count % VectorTripCount` is equally distributed.
7804+ unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7805+ assert (TripCount > 0 && " trip count should not be zero" );
7806+ const uint32_t Weights[] = {1 , TripCount - 1 };
7807+ setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7808+ }
77997809 }
78007810
78017811 return State.ExpandedSCEVs ;
@@ -8180,7 +8190,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
81808190 // If source is an exiting block, we know the exit edge is dynamically dead
81818191 // in the vector loop, and thus we don't need to restrict the mask. Avoid
81828192 // adding uses of an otherwise potentially dead instruction.
8183- if (OrigLoop->isLoopExiting (Src))
8193+ if (!Legal-> hasUncountableEarlyExit () && OrigLoop->isLoopExiting (Src))
81848194 return EdgeMaskCache[Edge] = SrcMask;
81858195
81868196 VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8863,76 +8873,78 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
88638873 }
88648874}
88658875
8866- // Collect VPIRInstructions for phis in the original exit block that are modeled
8876+ // Collect VPIRInstructions for phis in the exit blocks that are modeled
88678877// in VPlan and add the exiting VPValue as operand. Some exiting values are not
88688878// modeled explicitly yet and won't be included. Those are un-truncated
88698879// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
88708880// increments.
8871- static SetVector<VPIRInstruction *> collectUsersInExitBlock (
8881+ static SetVector<VPIRInstruction *> collectUsersInExitBlocks (
88728882 Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
88738883 const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8874- auto *MiddleVPBB = Plan.getMiddleBlock ();
8875- // No edge from the middle block to the unique exit block has been inserted
8876- // and there is nothing to fix from vector loop; phis should have incoming
8877- // from scalar loop only.
8878- if (MiddleVPBB->getNumSuccessors () != 2 )
8879- return {};
88808884 SetVector<VPIRInstruction *> ExitUsersToFix;
8881- VPBasicBlock *ExitVPBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors ()[0 ]);
8882- BasicBlock *ExitingBB = OrigLoop->getExitingBlock ();
8883- for (VPRecipeBase &R : *ExitVPBB) {
8884- auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8885- if (!ExitIRI)
8886- continue ;
8887- auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction ());
8888- if (!ExitPhi)
8889- break ;
8890- Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
8891- VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
8892- // Exit values for inductions are computed and updated outside of VPlan and
8893- // independent of induction recipes.
8894- // TODO: Compute induction exit values in VPlan.
8895- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8896- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8897- isa<VPWidenPointerInductionRecipe>(V) ||
8898- (isa<Instruction>(IncomingValue) &&
8899- OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8900- any_of (IncomingValue->users (), [&Inductions](User *U) {
8901- auto *P = dyn_cast<PHINode>(U);
8902- return P && Inductions.contains (P);
8903- })))
8904- continue ;
8905- ExitUsersToFix.insert (ExitIRI);
8906- ExitIRI->addOperand (V);
8885+ for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks ()) {
8886+ BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock ();
8887+ for (VPRecipeBase &R : *ExitVPBB) {
8888+ auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8889+ if (!ExitIRI)
8890+ continue ;
8891+ auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction ());
8892+ if (!ExitPhi)
8893+ break ;
8894+ for (BasicBlock *ExitingBB : predecessors (ExitBB)) {
8895+ if (!OrigLoop->contains (ExitingBB))
8896+ continue ;
8897+ Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
8898+ VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
8899+ // Exit values for inductions are computed and updated outside of VPlan
8900+ // and independent of induction recipes.
8901+ // TODO: Compute induction exit values in VPlan.
8902+ if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8903+ !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8904+ isa<VPWidenPointerInductionRecipe>(V) ||
8905+ (isa<Instruction>(IncomingValue) &&
8906+ OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8907+ any_of (IncomingValue->users (), [&Inductions](User *U) {
8908+ auto *P = dyn_cast<PHINode>(U);
8909+ return P && Inductions.contains (P);
8910+ })))
8911+ continue ;
8912+ ExitUsersToFix.insert (ExitIRI);
8913+ ExitIRI->addOperand (V);
8914+ }
8915+ }
89078916 }
89088917 return ExitUsersToFix;
89098918}
89108919
89118920// Add exit values to \p Plan. Extracts are added for each entry in \p
89128921// ExitUsersToFix if needed and their operands are updated.
89138922static void
8914- addUsersInExitBlock (VPlan &Plan,
8915- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8923+ addUsersInExitBlocks (VPlan &Plan,
8924+ const SetVector<VPIRInstruction *> &ExitUsersToFix) {
89168925 if (ExitUsersToFix.empty ())
89178926 return ;
89188927
8919- auto *MiddleVPBB = Plan.getMiddleBlock ();
8920- VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8921-
89228928 // Introduce extract for exiting values and update the VPIRInstructions
89238929 // modeling the corresponding LCSSA phis.
89248930 for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
8931+
89258932 VPValue *V = ExitIRI->getOperand (0 );
89268933 // Pass live-in values used by exit phis directly through to their users in
89278934 // the exit block.
89288935 if (V->isLiveIn ())
89298936 continue ;
89308937
8931- LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
8932- VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
8933- {V, Plan.getOrAddLiveIn (ConstantInt::get (
8934- IntegerType::get (Ctx, 32 ), 1 ))});
8935- ExitIRI->setOperand (0 , Ext);
8938+ for (VPBlockBase *PredVPB : ExitIRI->getParent ()->getPredecessors ()) {
8939+ auto *PredVPBB = cast<VPBasicBlock>(PredVPB);
8940+ VPBuilder B (PredVPBB, PredVPBB->getFirstNonPhi ());
8941+
8942+ LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
8943+ VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
8944+ {V, Plan.getOrAddLiveIn (ConstantInt::get (
8945+ IntegerType::get (Ctx, 32 ), 1 ))});
8946+ ExitIRI->setOperand (0 , Ext);
8947+ }
89368948 }
89378949}
89388950
@@ -9204,11 +9216,32 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92049216 " VPBasicBlock" );
92059217 RecipeBuilder.fixHeaderPhis ();
92069218
9219+ if (Legal->hasUncountableEarlyExit ()) {
9220+ VPlanTransforms::handleUncountableEarlyExit (
9221+ *Plan, *PSE.getSE (), OrigLoop, Legal->getUncountableExitingBlocks (),
9222+ RecipeBuilder);
9223+ }
92079224 addScalarResumePhis (RecipeBuilder, *Plan);
9208- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
9225+ SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
92099226 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
92109227 addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9211- addUsersInExitBlock (*Plan, ExitUsersToFix);
9228+ addUsersInExitBlocks (*Plan, ExitUsersToFix);
9229+
9230+ // Currently only live-ins can be used by exit values. We also bail out if any
9231+ // exit value isn't handled in VPlan yet, i.e. a VPIRInstruction in the exit
9232+ // without any operands.
9233+ if (Legal->hasUncountableEarlyExit ()) {
9234+ if (any_of (Plan->getExitBlocks (), [](VPIRBasicBlock *ExitBB) {
9235+ return any_of (*ExitBB, [](VPRecipeBase &R) {
9236+ auto VPIRI = cast<VPIRInstruction>(&R);
9237+ return VPIRI->getNumOperands () == 0 ||
9238+ any_of (VPIRI->operands (),
9239+ [](VPValue *Op) { return !Op->isLiveIn (); });
9240+ });
9241+ }))
9242+ return nullptr ;
9243+ }
9244+
92129245 // ---------------------------------------------------------------------------
92139246 // Transform initial VPlan: Apply previously taken decisions, in order, to
92149247 // bring the VPlan to its final state.
@@ -9968,12 +10001,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
996810001 }
996910002
997010003 if (LVL.hasUncountableEarlyExit ()) {
9971- reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
9972- " early exit is not yet supported" ,
9973- " Auto-vectorization of loops with uncountable "
9974- " early exit is not yet supported" ,
9975- " UncountableEarlyExitLoopsUnsupported" , ORE, L);
9976- return false ;
10004+ if (!EnableEarlyExitVectorization) {
10005+ reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10006+ " early exit is not yet supported" ,
10007+ " Auto-vectorization of loops with uncountable "
10008+ " early exit is not yet supported" ,
10009+ " UncountableEarlyExitLoopsUnsupported" , ORE,
10010+ L);
10011+ return false ;
10012+ }
997710013 }
997810014
997910015 // Entrance to the VPlan-native vectorization path. Outer loops are processed
0 commit comments