@@ -552,11 +552,6 @@ class InnerLoopVectorizer {
552552protected:
553553 friend class LoopVectorizationPlanner ;
554554
555- // / Set up the values of the IVs correctly when exiting the vector loop.
556- virtual void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
557- Value *VectorTripCount, BasicBlock *MiddleBlock,
558- VPTransformState &State);
559-
560555 // / Iteratively sink the scalarized operands of a predicated instruction into
561556 // / the block that was created for it.
562557 void sinkScalarOperands (Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785780 BasicBlock *emitIterationCountCheck (BasicBlock *Bypass, bool ForEpilogue);
786781 void printDebugTracesAtStart () override ;
787782 void printDebugTracesAtEnd () override ;
788-
789- void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
790- Value *VectorTripCount, BasicBlock *MiddleBlock,
791- VPTransformState &State) override {};
792783};
793784
794785// A specialized derived class of inner loop vectorizer that performs
@@ -2768,88 +2759,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27682759 return LoopVectorPreHeader;
27692760}
27702761
2771- // Fix up external users of the induction variable. At this point, we are
2772- // in LCSSA form, with all external PHIs that use the IV having one input value,
2773- // coming from the remainder loop. We need those PHIs to also have a correct
2774- // value for the IV when arriving directly from the middle block.
2775- void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2776- const InductionDescriptor &II,
2777- Value *VectorTripCount,
2778- BasicBlock *MiddleBlock,
2779- VPTransformState &State) {
2780- // There are two kinds of external IV usages - those that use the value
2781- // computed in the last iteration (the PHI) and those that use the penultimate
2782- // value (the value that feeds into the phi from the loop latch).
2783- // We allow both, but they, obviously, have different values.
2784-
2785- DenseMap<Value *, Value *> MissingVals;
2786-
2787- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
2788- OrigLoop->getLoopPreheader ()))
2789- ->getIncomingValueForBlock (MiddleBlock);
2790-
2791- // An external user of the last iteration's value should see the value that
2792- // the remainder loop uses to initialize its own IV.
2793- Value *PostInc = OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
2794- for (User *U : PostInc->users ()) {
2795- Instruction *UI = cast<Instruction>(U);
2796- if (!OrigLoop->contains (UI)) {
2797- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2798- MissingVals[UI] = EndValue;
2799- }
2800- }
2801-
2802- // An external user of the penultimate value need to see EndValue - Step.
2803- // The simplest way to get this is to recompute it from the constituent SCEVs,
2804- // that is Start + (Step * (CRD - 1)).
2805- for (User *U : OrigPhi->users ()) {
2806- auto *UI = cast<Instruction>(U);
2807- if (!OrigLoop->contains (UI)) {
2808- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2809- IRBuilder<> B (MiddleBlock->getTerminator ());
2810-
2811- // Fast-math-flags propagate from the original induction instruction.
2812- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2813- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2814-
2815- VPValue *StepVPV = Plan.getSCEVExpansion (II.getStep ());
2816- assert (StepVPV && " step must have been expanded during VPlan execution" );
2817- Value *Step = StepVPV->isLiveIn () ? StepVPV->getLiveInIRValue ()
2818- : State.get (StepVPV, VPLane (0 ));
2819- Value *Escape = nullptr ;
2820- if (EndValue->getType ()->isIntegerTy ())
2821- Escape = B.CreateSub (EndValue, Step);
2822- else if (EndValue->getType ()->isPointerTy ())
2823- Escape = B.CreatePtrAdd (EndValue, B.CreateNeg (Step));
2824- else {
2825- assert (EndValue->getType ()->isFloatingPointTy () &&
2826- " Unexpected induction type" );
2827- Escape = B.CreateBinOp (II.getInductionBinOp ()->getOpcode () ==
2828- Instruction::FAdd
2829- ? Instruction::FSub
2830- : Instruction::FAdd,
2831- EndValue, Step);
2832- }
2833- Escape->setName (" ind.escape" );
2834- MissingVals[UI] = Escape;
2835- }
2836- }
2837-
2838- assert ((MissingVals.empty () || OrigLoop->getUniqueExitBlock ()) &&
2839- " Expected a single exit block for escaping values" );
2840-
2841- for (auto &I : MissingVals) {
2842- PHINode *PHI = cast<PHINode>(I.first );
2843- // One corner case we have to handle is two IVs "chasing" each-other,
2844- // that is %IV2 = phi [...], [ %IV1, %latch ]
2845- // In this case, if IV1 has an external use, we need to avoid adding both
2846- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2847- // don't already have an incoming value for the middle block.
2848- if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2849- PHI->addIncoming (I.second , MiddleBlock);
2850- }
2851- }
2852-
28532762namespace {
28542763
28552764struct CSEDenseMapInfo {
@@ -2978,24 +2887,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29782887 for (PHINode &PN : Exit->phis ())
29792888 PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
29802889
2981- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2982- // No edge from the middle block to the unique exit block has been inserted
2983- // and there is nothing to fix from vector loop; phis should have incoming
2984- // from scalar loop only.
2985- } else {
2986- // TODO: Check in VPlan to see if IV users need fixing instead of checking
2987- // the cost model.
2988-
2989- // If we inserted an edge from the middle block to the unique exit block,
2990- // update uses outside the loop (phis) to account for the newly inserted
2991- // edge.
2992-
2993- // Fix-up external users of the induction variables.
2994- for (const auto &Entry : Legal->getInductionVars ())
2995- fixupIVUsers (Entry.first , Entry.second ,
2996- getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, State);
2997- }
2998-
29992890 for (Instruction *PI : PredicatedInstructions)
30002891 sinkScalarOperands (&*PI);
30012892
@@ -8839,11 +8730,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
88398730// / Create a ResumePhi for \p PhiR, if it is wide induction recipe. If the
88408731// / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
88418732// / the end value of the induction.
8842- static VPValue *addResumeValuesForInduction (VPHeaderPHIRecipe *PhiR,
8843- VPBuilder &VectorPHBuilder,
8844- VPBuilder &ScalarPHBuilder,
8845- VPTypeAnalysis &TypeInfo,
8846- VPValue *VectorTC) {
8733+ static VPValue *addResumeValuesForInduction (
8734+ VPHeaderPHIRecipe *PhiR, VPBuilder &VectorPHBuilder,
8735+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8736+ Loop *OrigLoop, DenseMap<VPValue *, VPValue *> &EndValues) {
88478737 auto *WideIV = dyn_cast<VPWidenInductionRecipe>(PhiR);
88488738 if (!WideIV)
88498739 return nullptr ;
@@ -8875,6 +8765,7 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
88758765 ScalarTy);
88768766 }
88778767
8768+ EndValues[PhiR] = EndValue;
88788769 auto *ResumePhiRecipe =
88798770 ScalarPHBuilder.createNaryOp (VPInstruction::ResumePhi, {EndValue, Start},
88808771 WideIV->getDebugLoc (), " bc.resume.val" );
@@ -8886,7 +8777,8 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
88868777// / original phis in the scalar header.
88878778static void addScalarResumePhis (
88888779 VPlan &Plan,
8889- function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe) {
8780+ function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe,
8781+ Loop *OrigLoop, DenseMap<VPValue *, VPValue *> &EndValues) {
88908782 VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
88918783 auto *ScalarPH = Plan.getScalarPreheader ();
88928784 auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
@@ -8905,7 +8797,7 @@ static void addScalarResumePhis(
89058797
89068798 if (VPValue *ResumePhi = addResumeValuesForInduction (
89078799 VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8908- &Plan.getVectorTripCount ())) {
8800+ &Plan.getVectorTripCount (), OrigLoop, EndValues )) {
89098801 ScalarPhiIRI->addOperand (ResumePhi);
89108802 continue ;
89118803 }
@@ -8937,9 +8829,9 @@ static void addScalarResumePhis(
89378829// modeled explicitly yet and won't be included. Those are un-truncated
89388830// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
89398831// increments.
8940- static SetVector<VPIRInstruction *> collectUsersInExitBlocks (
8941- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan ,
8942- const MapVector<PHINode *, InductionDescriptor> &Inductions ) {
8832+ static SetVector<VPIRInstruction *>
8833+ collectUsersInExitBlocks ( Loop *OrigLoop, VPRecipeBuilder &Builder,
8834+ VPlan &Plan ) {
89438835 auto *MiddleVPBB = Plan.getMiddleBlock ();
89448836 SetVector<VPIRInstruction *> ExitUsersToFix;
89458837 for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks ()) {
@@ -8964,18 +8856,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89648856 // Exit values for inductions are computed and updated outside of VPlan
89658857 // and independent of induction recipes.
89668858 // TODO: Compute induction exit values in VPlan.
8967- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8968- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8969- isa<VPWidenPointerInductionRecipe>(V) ||
8970- (isa<Instruction>(IncomingValue) &&
8971- OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8972- any_of (IncomingValue->users (), [&Inductions](User *U) {
8973- auto *P = dyn_cast<PHINode>(U);
8974- return P && Inductions.contains (P);
8975- }))) {
8976- if (ExitVPBB->getSinglePredecessor () == MiddleVPBB)
8977- continue ;
8978- }
89798859 ExitUsersToFix.insert (ExitIRI);
89808860 ExitIRI->addOperand (V);
89818861 }
@@ -8987,14 +8867,16 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
89878867// Add exit values to \p Plan. Extracts are added for each entry in \p
89888868// ExitUsersToFix if needed and their operands are updated. Returns true if all
89898869// exit users can be handled, otherwise return false.
8990- static bool
8991- addUsersInExitBlocks (VPlan &Plan,
8992- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8870+ static bool addUsersInExitBlocks (
8871+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
8872+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8873+ DenseMap<VPValue *, VPValue *> &EndValues) {
89938874 if (ExitUsersToFix.empty ())
89948875 return true ;
89958876
89968877 auto *MiddleVPBB = Plan.getMiddleBlock ();
89978878 VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8879+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
89988880
89998881 // Introduce extract for exiting values and update the VPIRInstructions
90008882 // modeling the corresponding LCSSA phis.
@@ -9010,6 +8892,69 @@ addUsersInExitBlocks(VPlan &Plan,
90108892 if (ExitIRI->getParent ()->getSinglePredecessor () != MiddleVPBB)
90118893 return false ;
90128894
8895+ VPValue *Incoming = ExitIRI->getOperand (0 );
8896+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8897+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst ()) ||
8898+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
8899+ (isa<Instruction>(Incoming->getUnderlyingValue ()) &&
8900+ // OrigLoop->contains(cast<Instruction>(Incoming->getUnderlyingValue()))
8901+ // &&
8902+ any_of (cast<Instruction>(Incoming->getUnderlyingValue ())->users (),
8903+ [&Inductions](User *U) {
8904+ auto *P = dyn_cast<PHINode>(U);
8905+ return P && Inductions.contains (P);
8906+ }))) {
8907+ VPValue *IV;
8908+ if (auto *WideIV =
8909+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe ()))
8910+ IV = WideIV;
8911+ else if (auto *WideIV = dyn_cast<VPWidenInductionRecipe>(
8912+ Incoming->getDefiningRecipe ()
8913+ ->getOperand (0 )
8914+ ->getDefiningRecipe ()))
8915+ IV = WideIV;
8916+ else
8917+ IV = Incoming->getDefiningRecipe ()->getOperand (1 );
8918+ // Skip phi nodes already updated. This can be the case if 2 induction
8919+ // phis chase each other.
8920+ VPValue *EndValue = EndValues[IV];
8921+ if (any_of (cast<VPRecipeBase>(Incoming->getDefiningRecipe ())->operands (),
8922+ IsaPred<VPWidenIntOrFpInductionRecipe,
8923+ VPWidenPointerInductionRecipe>)) {
8924+ ExitIRI->setOperand (0 , EndValue);
8925+ continue ;
8926+ }
8927+
8928+ VPBuilder B (Plan.getMiddleBlock ()->getTerminator ());
8929+ VPValue *Escape = nullptr ;
8930+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe ());
8931+ VPValue *Step = WideIV->getStepValue ();
8932+ Type *ScalarTy = TypeInfo.inferScalarType (WideIV);
8933+ if (ScalarTy->isIntegerTy ())
8934+ Escape = B.createNaryOp (Instruction::Sub, {EndValue, Step}, {},
8935+ " ind.escape" );
8936+ else if (ScalarTy->isPointerTy ())
8937+ Escape = B.createPtrAdd (
8938+ EndValue,
8939+ B.createNaryOp (Instruction::Sub,
8940+ {Plan.getOrAddLiveIn (ConstantInt::get (
8941+ Step->getLiveInIRValue ()->getType (), 0 )),
8942+ Step}),
8943+ {}, " ind.escape" );
8944+ else if (ScalarTy->isFloatingPointTy ()) {
8945+ const auto &ID = WideIV->getInductionDescriptor ();
8946+ Escape = B.createNaryOp (
8947+ ID.getInductionBinOp ()->getOpcode () == Instruction::FAdd
8948+ ? Instruction::FSub
8949+ : Instruction::FAdd,
8950+ {EndValue, Step}, {ID.getInductionBinOp ()->getFastMathFlags ()});
8951+ } else {
8952+ llvm_unreachable (" all possible induction types must be handled" );
8953+ }
8954+ ExitIRI->setOperand (0 , Escape);
8955+ continue ;
8956+ }
8957+
90138958 LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
90148959 VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
90158960 {V, Plan.getOrAddLiveIn (ConstantInt::get (
@@ -9294,13 +9239,18 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92949239 VPlanTransforms::handleUncountableEarlyExit (
92959240 *Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
92969241 }
9297- addScalarResumePhis (*Plan, [&RecipeBuilder](PHINode *P) {
9298- return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe (P));
9299- });
9300- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
9301- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9242+ DenseMap<VPValue *, VPValue *> EndValues;
9243+ addScalarResumePhis (
9244+ *Plan,
9245+ [&RecipeBuilder](PHINode *P) {
9246+ return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe (P));
9247+ },
9248+ OrigLoop, EndValues);
9249+ SetVector<VPIRInstruction *> ExitUsersToFix =
9250+ collectUsersInExitBlocks (OrigLoop, RecipeBuilder, *Plan);
93029251 addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9303- if (!addUsersInExitBlocks (*Plan, ExitUsersToFix)) {
9252+ if (!addUsersInExitBlocks (*Plan, ExitUsersToFix, Legal->getInductionVars (),
9253+ EndValues)) {
93049254 reportVectorizationFailure (
93059255 " Some exit values in loop with uncountable exit not supported yet" ,
93069256 " Some exit values in loop with uncountable exit not supported yet" ,
@@ -9419,6 +9369,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
94199369 addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
94209370 DebugLoc ());
94219371
9372+ DenseMap<VPValue *, VPValue *> EndValues;
94229373 addScalarResumePhis (
94239374 *Plan,
94249375 [&Plan](PHINode *P) {
@@ -9428,9 +9379,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
94289379 auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
94299380 return HeaderR->getUnderlyingValue () == P ? HeaderR : nullptr ;
94309381 });
9431- }
9432-
9433- );
9382+ },
9383+ OrigLoop, EndValues);
94349384
94359385 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
94369386 return Plan;
0 commit comments