@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
542542protected:
543543 friend class LoopVectorizationPlanner ;
544544
545- // / Set up the values of the IVs correctly when exiting the vector loop.
546- virtual void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
547- Value *VectorTripCount, BasicBlock *MiddleBlock,
548- VPTransformState &State);
549-
550545 // / Iteratively sink the scalarized operands of a predicated instruction into
551546 // / the block that was created for it.
552547 void sinkScalarOperands (Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
775770 BasicBlock *emitIterationCountCheck (BasicBlock *Bypass, bool ForEpilogue);
776771 void printDebugTracesAtStart () override ;
777772 void printDebugTracesAtEnd () override ;
778-
779- void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
780- Value *VectorTripCount, BasicBlock *MiddleBlock,
781- VPTransformState &State) override {};
782773};
783774
784775// A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27512742 return LoopVectorPreHeader;
27522743}
27532744
2754- // Fix up external users of the induction variable. At this point, we are
2755- // in LCSSA form, with all external PHIs that use the IV having one input value,
2756- // coming from the remainder loop. We need those PHIs to also have a correct
2757- // value for the IV when arriving directly from the middle block.
2758- void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2759- const InductionDescriptor &II,
2760- Value *VectorTripCount,
2761- BasicBlock *MiddleBlock,
2762- VPTransformState &State) {
2763- // There are two kinds of external IV usages - those that use the value
2764- // computed in the last iteration (the PHI) and those that use the penultimate
2765- // value (the value that feeds into the phi from the loop latch).
2766- // We allow both, but they, obviously, have different values.
2767-
2768- DenseMap<Value *, Value *> MissingVals;
2769-
2770- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
2771- OrigLoop->getLoopPreheader ()))
2772- ->getIncomingValueForBlock (MiddleBlock);
2773-
2774- // An external user of the last iteration's value should see the value that
2775- // the remainder loop uses to initialize its own IV.
2776- Value *PostInc = OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
2777- for (User *U : PostInc->users ()) {
2778- Instruction *UI = cast<Instruction>(U);
2779- if (!OrigLoop->contains (UI)) {
2780- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2781- MissingVals[UI] = EndValue;
2782- }
2783- }
2784-
2785- // An external user of the penultimate value need to see EndValue - Step.
2786- // The simplest way to get this is to recompute it from the constituent SCEVs,
2787- // that is Start + (Step * (CRD - 1)).
2788- for (User *U : OrigPhi->users ()) {
2789- auto *UI = cast<Instruction>(U);
2790- if (!OrigLoop->contains (UI)) {
2791- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2792- IRBuilder<> B (MiddleBlock->getTerminator ());
2793-
2794- // Fast-math-flags propagate from the original induction instruction.
2795- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2796- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2797-
2798- VPValue *StepVPV = Plan.getSCEVExpansion (II.getStep ());
2799- assert (StepVPV && " step must have been expanded during VPlan execution" );
2800- Value *Step = StepVPV->isLiveIn () ? StepVPV->getLiveInIRValue ()
2801- : State.get (StepVPV, VPLane (0 ));
2802- Value *Escape = nullptr ;
2803- if (EndValue->getType ()->isIntegerTy ())
2804- Escape = B.CreateSub (EndValue, Step);
2805- else if (EndValue->getType ()->isPointerTy ())
2806- Escape = B.CreatePtrAdd (EndValue, B.CreateNeg (Step));
2807- else {
2808- assert (EndValue->getType ()->isFloatingPointTy () &&
2809- " Unexpected induction type" );
2810- Escape = B.CreateBinOp (II.getInductionBinOp ()->getOpcode () ==
2811- Instruction::FAdd
2812- ? Instruction::FSub
2813- : Instruction::FAdd,
2814- EndValue, Step);
2815- }
2816- Escape->setName (" ind.escape" );
2817- MissingVals[UI] = Escape;
2818- }
2819- }
2820-
2821- assert ((MissingVals.empty () ||
2822- all_of (MissingVals,
2823- [MiddleBlock, this ](const std::pair<Value *, Value *> &P) {
2824- return all_of (
2825- predecessors (cast<Instruction>(P.first )->getParent ()),
2826- [MiddleBlock, this ](BasicBlock *Pred) {
2827- return Pred == MiddleBlock ||
2828- Pred == OrigLoop->getLoopLatch ();
2829- });
2830- })) &&
2831- " Expected escaping values from latch/middle.block only" );
2832-
2833- for (auto &I : MissingVals) {
2834- PHINode *PHI = cast<PHINode>(I.first );
2835- // One corner case we have to handle is two IVs "chasing" each-other,
2836- // that is %IV2 = phi [...], [ %IV1, %latch ]
2837- // In this case, if IV1 has an external use, we need to avoid adding both
2838- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2839- // don't already have an incoming value for the middle block.
2840- if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2841- PHI->addIncoming (I.second , MiddleBlock);
2842- }
2843- }
2844-
28452745namespace {
28462746
28472747struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29862886 for (PHINode &PN : Exit->phis ())
29872887 PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
29882888
2989- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2990- // No edge from the middle block to the unique exit block has been inserted
2991- // and there is nothing to fix from vector loop; phis should have incoming
2992- // from scalar loop only.
2993- } else {
2994- // TODO: Check in VPlan to see if IV users need fixing instead of checking
2995- // the cost model.
2996-
2997- // If we inserted an edge from the middle block to the unique exit block,
2998- // update uses outside the loop (phis) to account for the newly inserted
2999- // edge.
3000-
3001- // Fix-up external users of the induction variables.
3002- for (const auto &Entry : Legal->getInductionVars ())
3003- fixupIVUsers (Entry.first , Entry.second ,
3004- getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, State);
3005- }
3006-
30072889 for (Instruction *PI : PredicatedInstructions)
30082890 sinkScalarOperands (&*PI);
30092891
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
88578739// / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
88588740// / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
88598741// / the end value of the induction.
8860- static VPValue *addResumePhiRecipeForInduction (VPWidenInductionRecipe *WideIV,
8861- VPBuilder &VectorPHBuilder,
8862- VPBuilder &ScalarPHBuilder,
8863- VPTypeAnalysis &TypeInfo,
8864- VPValue *VectorTC) {
8742+ static VPValue *addResumePhiRecipeForInduction (
8743+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8744+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8745+ DenseMap<VPValue *, VPValue *> &EndValues) {
88658746 auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
88668747 // Truncated wide inductions resume from the last lane of their vector value
88678748 // in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
88868767 ScalarTypeOfWideIV);
88878768 }
88888769
8770+ EndValues[WideIV] = EndValue;
88898771 auto *ResumePhiRecipe =
88908772 ScalarPHBuilder.createNaryOp (VPInstruction::ResumePhi, {EndValue, Start},
88918773 WideIV->getDebugLoc (), " bc.resume.val" );
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
88958777// / Create resume phis in the scalar preheader for first-order recurrences,
88968778// / reductions and inductions, and update the VPIRInstructions wrapping the
88978779// / original phis in the scalar header.
8898- static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan) {
8780+ static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan,
8781+ Loop *OrigLoop,
8782+ DenseMap<VPValue *, VPValue *> &EndValues) {
88998783 VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
89008784 auto *ScalarPH = Plan.getScalarPreheader ();
89018785 auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89158799 if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
89168800 if (VPValue *ResumePhi = addResumePhiRecipeForInduction (
89178801 WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8918- &Plan.getVectorTripCount ())) {
8802+ &Plan.getVectorTripCount (), EndValues )) {
89198803 ScalarPhiIRI->addOperand (ResumePhi);
89208804 continue ;
89218805 }
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89498833// modeled explicitly yet and won't be included. Those are un-truncated
89508834// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
89518835// increments.
8952- static SetVector<VPIRInstruction *> collectUsersInExitBlocks (
8953- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan ,
8954- const MapVector<PHINode *, InductionDescriptor> &Inductions ) {
8836+ static SetVector<VPIRInstruction *>
8837+ collectUsersInExitBlocks ( Loop *OrigLoop, VPRecipeBuilder &Builder,
8838+ VPlan &Plan ) {
89558839 auto *MiddleVPBB = Plan.getMiddleBlock ();
89568840 SetVector<VPIRInstruction *> ExitUsersToFix;
89578841 for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks ()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89768860 // Exit values for inductions are computed and updated outside of VPlan
89778861 // and independent of induction recipes.
89788862 // TODO: Compute induction exit values in VPlan.
8979- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8980- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8981- isa<VPWidenPointerInductionRecipe>(V) ||
8982- (isa<Instruction>(IncomingValue) &&
8983- OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8984- any_of (IncomingValue->users (), [&Inductions](User *U) {
8985- auto *P = dyn_cast<PHINode>(U);
8986- return P && Inductions.contains (P);
8987- }))) {
8988- if (ExitVPBB->getSinglePredecessor () == MiddleVPBB)
8989- continue ;
8990- }
89918863 ExitUsersToFix.insert (ExitIRI);
89928864 ExitIRI->addOperand (V);
89938865 }
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89968868 return ExitUsersToFix;
89978869}
89988870
8871+ // / If \p Incoming is a user of a non-truncated induction, create recipes to
8872+ // / compute the final value and update the user \p ExitIRI.
8873+ static bool addInductionEndValue (
8874+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
8875+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8876+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
8877+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8878+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst ()) ||
8879+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
8880+ (isa<Instruction>(Incoming->getUnderlyingValue ()) &&
8881+ any_of (cast<Instruction>(Incoming->getUnderlyingValue ())->users (),
8882+ [&Inductions](User *U) {
8883+ auto *P = dyn_cast<PHINode>(U);
8884+ return P && Inductions.contains (P);
8885+ }))) {
8886+ VPValue *IV;
8887+ if (auto *WideIV =
8888+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe ()))
8889+ IV = WideIV;
8890+ else if (auto *WideIV =
8891+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe ()
8892+ ->getOperand (0 )
8893+ ->getDefiningRecipe ()))
8894+ IV = WideIV;
8895+ else
8896+ IV = Incoming->getDefiningRecipe ()->getOperand (1 );
8897+ // Skip phi nodes already updated. This can be the case if 2 induction
8898+ // phis chase each other.
8899+ VPValue *EndValue = EndValues[IV];
8900+ if (any_of (cast<VPRecipeBase>(Incoming->getDefiningRecipe ())->operands (),
8901+ IsaPred<VPWidenIntOrFpInductionRecipe,
8902+ VPWidenPointerInductionRecipe>)) {
8903+ ExitIRI->setOperand (0 , EndValue);
8904+ return true ;
8905+ }
8906+
8907+ VPBuilder B (Plan.getMiddleBlock ()->getTerminator ());
8908+ VPValue *Escape = nullptr ;
8909+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe ());
8910+ VPValue *Step = WideIV->getStepValue ();
8911+ Type *ScalarTy = TypeInfo.inferScalarType (WideIV);
8912+ if (ScalarTy->isIntegerTy ())
8913+ Escape =
8914+ B.createNaryOp (Instruction::Sub, {EndValue, Step}, {}, " ind.escape" );
8915+ else if (ScalarTy->isPointerTy ())
8916+ Escape = B.createPtrAdd (
8917+ EndValue,
8918+ B.createNaryOp (Instruction::Sub,
8919+ {Plan.getOrAddLiveIn (ConstantInt::get (
8920+ Step->getLiveInIRValue ()->getType (), 0 )),
8921+ Step}),
8922+ {}, " ind.escape" );
8923+ else if (ScalarTy->isFloatingPointTy ()) {
8924+ const auto &ID = WideIV->getInductionDescriptor ();
8925+ Escape = B.createNaryOp (
8926+ ID.getInductionBinOp ()->getOpcode () == Instruction::FAdd
8927+ ? Instruction::FSub
8928+ : Instruction::FAdd,
8929+ {EndValue, Step}, {ID.getInductionBinOp ()->getFastMathFlags ()});
8930+ } else {
8931+ llvm_unreachable (" all possible induction types must be handled" );
8932+ }
8933+ ExitIRI->setOperand (0 , Escape);
8934+ return true ;
8935+ }
8936+ return false ;
8937+ }
89998938// Add exit values to \p Plan. Extracts are added for each entry in \p
90008939// ExitUsersToFix if needed and their operands are updated. Returns true if all
90018940// exit users can be handled, otherwise return false.
9002- static bool
9003- addUsersInExitBlocks (VPlan &Plan,
9004- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8941+ static bool addUsersInExitBlocks (
8942+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
8943+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8944+ DenseMap<VPValue *, VPValue *> &EndValues) {
90058945 if (ExitUsersToFix.empty ())
90068946 return true ;
90078947
90088948 auto *MiddleVPBB = Plan.getMiddleBlock ();
90098949 VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8950+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
90108951
90118952 // Introduce extract for exiting values and update the VPIRInstructions
90128953 // modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
90228963 if (ExitIRI->getParent ()->getSinglePredecessor () != MiddleVPBB)
90238964 return false ;
90248965
8966+ VPValue *Incoming = ExitIRI->getOperand (0 );
8967+ if (addInductionEndValue (Plan, ExitIRI, Incoming, Inductions, EndValues,
8968+ TypeInfo))
8969+ continue ;
8970+
90258971 LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
90268972 VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
90278973 {Op, Plan.getOrAddLiveIn (ConstantInt::get (
90288974 IntegerType::get (Ctx, 32 ), 1 ))});
9029- ExitIRI->setOperand (Idx , Ext);
8975+ ExitIRI->setOperand (0 , Ext);
90308976 }
90318977 }
90328978 return true ;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93079253 VPlanTransforms::handleUncountableEarlyExit (
93089254 *Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93099255 }
9310- addScalarResumePhis (RecipeBuilder, *Plan);
9311- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
9312- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9256+ DenseMap<VPValue *, VPValue *> EndValues;
9257+ addScalarResumePhis (RecipeBuilder, *Plan, OrigLoop, EndValues);
9258+ SetVector<VPIRInstruction *> ExitUsersToFix =
9259+ collectUsersInExitBlocks (OrigLoop, RecipeBuilder, *Plan);
93139260 addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9314- if (!addUsersInExitBlocks (*Plan, ExitUsersToFix)) {
9261+ if (!addUsersInExitBlocks (*Plan, ExitUsersToFix, Legal->getInductionVars (),
9262+ EndValues)) {
93159263 reportVectorizationFailure (
93169264 " Some exit values in loop with uncountable exit not supported yet" ,
93179265 " UncountableEarlyExitLoopsUnsupportedExitValue" , ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
94389386 auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
94399387 RecipeBuilder.setRecipe (HeaderR->getUnderlyingInstr (), HeaderR);
94409388 }
9441- addScalarResumePhis (RecipeBuilder, *Plan);
9389+ DenseMap<VPValue *, VPValue *> EndValues;
9390+ addScalarResumePhis (RecipeBuilder, *Plan, OrigLoop, EndValues);
94429391
94439392 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
94449393 return Plan;
0 commit comments