@@ -543,11 +543,6 @@ class InnerLoopVectorizer {
543543protected:
544544 friend class LoopVectorizationPlanner ;
545545
546- // / Set up the values of the IVs correctly when exiting the vector loop.
547- virtual void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
548- Value *VectorTripCount, BasicBlock *MiddleBlock,
549- VPTransformState &State);
550-
551546 // / Iteratively sink the scalarized operands of a predicated instruction into
552547 // / the block that was created for it.
553548 void sinkScalarOperands (Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785780 BasicBlock *emitIterationCountCheck (BasicBlock *Bypass, bool ForEpilogue);
786781 void printDebugTracesAtStart () override ;
787782 void printDebugTracesAtEnd () override ;
788-
789- void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
790- Value *VectorTripCount, BasicBlock *MiddleBlock,
791- VPTransformState &State) override {};
792783};
793784
794785// A specialized derived class of inner loop vectorizer that performs
@@ -2775,97 +2766,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27752766 return LoopVectorPreHeader;
27762767}
27772768
2778- // Fix up external users of the induction variable. At this point, we are
2779- // in LCSSA form, with all external PHIs that use the IV having one input value,
2780- // coming from the remainder loop. We need those PHIs to also have a correct
2781- // value for the IV when arriving directly from the middle block.
2782- void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2783- const InductionDescriptor &II,
2784- Value *VectorTripCount,
2785- BasicBlock *MiddleBlock,
2786- VPTransformState &State) {
2787- // There are two kinds of external IV usages - those that use the value
2788- // computed in the last iteration (the PHI) and those that use the penultimate
2789- // value (the value that feeds into the phi from the loop latch).
2790- // We allow both, but they, obviously, have different values.
2791-
2792- DenseMap<Value *, Value *> MissingVals;
2793-
2794- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
2795- OrigLoop->getLoopPreheader ()))
2796- ->getIncomingValueForBlock (MiddleBlock);
2797-
2798- // An external user of the last iteration's value should see the value that
2799- // the remainder loop uses to initialize its own IV.
2800- Value *PostInc = OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
2801- for (User *U : PostInc->users ()) {
2802- Instruction *UI = cast<Instruction>(U);
2803- if (!OrigLoop->contains (UI)) {
2804- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2805- MissingVals[UI] = EndValue;
2806- }
2807- }
2808-
2809- // An external user of the penultimate value need to see EndValue - Step.
2810- // The simplest way to get this is to recompute it from the constituent SCEVs,
2811- // that is Start + (Step * (CRD - 1)).
2812- for (User *U : OrigPhi->users ()) {
2813- auto *UI = cast<Instruction>(U);
2814- if (!OrigLoop->contains (UI)) {
2815- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2816- IRBuilder<> B (MiddleBlock->getTerminator ());
2817-
2818- // Fast-math-flags propagate from the original induction instruction.
2819- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2820- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2821-
2822- VPValue *StepVPV = Plan.getSCEVExpansion (II.getStep ());
2823- assert (StepVPV && " step must have been expanded during VPlan execution" );
2824- Value *Step = StepVPV->isLiveIn () ? StepVPV->getLiveInIRValue ()
2825- : State.get (StepVPV, VPLane (0 ));
2826- Value *Escape = nullptr ;
2827- if (EndValue->getType ()->isIntegerTy ())
2828- Escape = B.CreateSub (EndValue, Step);
2829- else if (EndValue->getType ()->isPointerTy ())
2830- Escape = B.CreatePtrAdd (EndValue, B.CreateNeg (Step));
2831- else {
2832- assert (EndValue->getType ()->isFloatingPointTy () &&
2833- " Unexpected induction type" );
2834- Escape = B.CreateBinOp (II.getInductionBinOp ()->getOpcode () ==
2835- Instruction::FAdd
2836- ? Instruction::FSub
2837- : Instruction::FAdd,
2838- EndValue, Step);
2839- }
2840- Escape->setName (" ind.escape" );
2841- MissingVals[UI] = Escape;
2842- }
2843- }
2844-
2845- assert ((MissingVals.empty () ||
2846- all_of (MissingVals,
2847- [MiddleBlock, this ](const std::pair<Value *, Value *> &P) {
2848- return all_of (
2849- predecessors (cast<Instruction>(P.first )->getParent ()),
2850- [MiddleBlock, this ](BasicBlock *Pred) {
2851- return Pred == MiddleBlock ||
2852- Pred == OrigLoop->getLoopLatch ();
2853- });
2854- })) &&
2855- " Expected escaping values from latch/middle.block only" );
2856-
2857- for (auto &I : MissingVals) {
2858- PHINode *PHI = cast<PHINode>(I.first );
2859- // One corner case we have to handle is two IVs "chasing" each-other,
2860- // that is %IV2 = phi [...], [ %IV1, %latch ]
2861- // In this case, if IV1 has an external use, we need to avoid adding both
2862- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2863- // don't already have an incoming value for the middle block.
2864- if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2865- PHI->addIncoming (I.second , MiddleBlock);
2866- }
2867- }
2868-
28692769namespace {
28702770
28712771struct CSEDenseMapInfo {
@@ -2994,24 +2894,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29942894 for (PHINode &PN : Exit->phis ())
29952895 PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
29962896
2997- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2998- // No edge from the middle block to the unique exit block has been inserted
2999- // and there is nothing to fix from vector loop; phis should have incoming
3000- // from scalar loop only.
3001- } else {
3002- // TODO: Check in VPlan to see if IV users need fixing instead of checking
3003- // the cost model.
3004-
3005- // If we inserted an edge from the middle block to the unique exit block,
3006- // update uses outside the loop (phis) to account for the newly inserted
3007- // edge.
3008-
3009- // Fix-up external users of the induction variables.
3010- for (const auto &Entry : Legal->getInductionVars ())
3011- fixupIVUsers (Entry.first , Entry.second ,
3012- getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, State);
3013- }
3014-
30152897 for (Instruction *PI : PredicatedInstructions)
30162898 sinkScalarOperands (&*PI);
30172899
@@ -8866,11 +8748,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
88668748// / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
88678749// / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
88688750// / the end value of the induction.
8869- static VPValue *addResumePhiRecipeForInduction (VPWidenInductionRecipe *WideIV,
8870- VPBuilder &VectorPHBuilder,
8871- VPBuilder &ScalarPHBuilder,
8872- VPTypeAnalysis &TypeInfo,
8873- VPValue *VectorTC) {
8751+ static VPValue *addResumePhiRecipeForInduction (
8752+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8753+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8754+ DenseMap<VPValue *, VPValue *> &EndValues) {
88748755 auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
88758756 // Truncated wide inductions resume from the last lane of their vector value
88768757 // in the last vector iteration which is handled elsewhere.
@@ -8895,6 +8776,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
88958776 ScalarTypeOfWideIV);
88968777 }
88978778
8779+ EndValues[WideIV] = EndValue;
88988780 auto *ResumePhiRecipe =
88998781 ScalarPHBuilder.createNaryOp (VPInstruction::ResumePhi, {EndValue, Start},
89008782 WideIV->getDebugLoc (), " bc.resume.val" );
@@ -8904,7 +8786,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
89048786// / Create resume phis in the scalar preheader for first-order recurrences,
89058787// / reductions and inductions, and update the VPIRInstructions wrapping the
89068788// / original phis in the scalar header.
8907- static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan) {
8789+ static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan,
8790+ Loop *OrigLoop,
8791+ DenseMap<VPValue *, VPValue *> &EndValues) {
89088792 VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
89098793 auto *ScalarPH = Plan.getScalarPreheader ();
89108794 auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
@@ -8924,7 +8808,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89248808 if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
89258809 if (VPValue *ResumePhi = addResumePhiRecipeForInduction (
89268810 WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8927- &Plan.getVectorTripCount ())) {
8811+ &Plan.getVectorTripCount (), EndValues )) {
89288812 ScalarPhiIRI->addOperand (ResumePhi);
89298813 continue ;
89308814 }
@@ -9009,9 +8893,9 @@ static bool isIVUse(VPValue *Incoming) {
90098893// modeled explicitly yet and won't be included. Those are un-truncated
90108894// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
90118895// increments.
9012- static SetVector<VPIRInstruction *> collectUsersInExitBlocks (
9013- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan
9014- ) {
8896+ static SetVector<VPIRInstruction *>
8897+ collectUsersInExitBlocks ( Loop *OrigLoop, VPRecipeBuilder &Builder,
8898+ VPlan &Plan ) {
90158899 auto *MiddleVPBB = Plan.getMiddleBlock ();
90168900 SetVector<VPIRInstruction *> ExitUsersToFix;
90178901 for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks ()) {
@@ -9033,11 +8917,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
90338917 }
90348918 Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
90358919 VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
9036- // Exit values for inductions are computed and updated outside of VPlan
9037- // and independent of induction recipes.
9038- // TODO: Compute induction exit values in VPlan.
9039- if (isIVUse (V) && ExitVPBB->getSinglePredecessor () == MiddleVPBB)
9040- continue ;
90418920 ExitUsersToFix.insert (ExitIRI);
90428921 ExitIRI->addOperand (V);
90438922 }
@@ -9046,17 +8925,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
90468925 return ExitUsersToFix;
90478926}
90488927
8928+ // / If \p Incoming is a user of a non-truncated induction, create recipes to
8929+ // / compute the final value and update the user \p ExitIRI.
8930+ static bool addInductionEndValue (
8931+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
8932+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8933+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
8934+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8935+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst ()) ||
8936+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
8937+ (isa<Instruction>(Incoming->getUnderlyingValue ()) &&
8938+ any_of (cast<Instruction>(Incoming->getUnderlyingValue ())->users (),
8939+ [&Inductions](User *U) {
8940+ auto *P = dyn_cast<PHINode>(U);
8941+ return P && Inductions.contains (P);
8942+ }))) {
8943+ VPValue *IV;
8944+ if (auto *WideIV =
8945+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe ()))
8946+ IV = WideIV;
8947+ else if (auto *WideIV =
8948+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe ()
8949+ ->getOperand (0 )
8950+ ->getDefiningRecipe ()))
8951+ IV = WideIV;
8952+ else
8953+ IV = Incoming->getDefiningRecipe ()->getOperand (1 );
8954+ // Skip phi nodes already updated. This can be the case if 2 induction
8955+ // phis chase each other.
8956+ VPValue *EndValue = EndValues[IV];
8957+ if (any_of (cast<VPRecipeBase>(Incoming->getDefiningRecipe ())->operands (),
8958+ IsaPred<VPWidenIntOrFpInductionRecipe,
8959+ VPWidenPointerInductionRecipe>)) {
8960+ ExitIRI->setOperand (0 , EndValue);
8961+ return true ;
8962+ }
8963+
8964+ VPBuilder B (Plan.getMiddleBlock ()->getTerminator ());
8965+ VPValue *Escape = nullptr ;
8966+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe ());
8967+ VPValue *Step = WideIV->getStepValue ();
8968+ Type *ScalarTy = TypeInfo.inferScalarType (WideIV);
8969+ if (ScalarTy->isIntegerTy ())
8970+ Escape =
8971+ B.createNaryOp (Instruction::Sub, {EndValue, Step}, {}, " ind.escape" );
8972+ else if (ScalarTy->isPointerTy ())
8973+ Escape = B.createPtrAdd (
8974+ EndValue,
8975+ B.createNaryOp (Instruction::Sub,
8976+ {Plan.getOrAddLiveIn (ConstantInt::get (
8977+ Step->getLiveInIRValue ()->getType (), 0 )),
8978+ Step}),
8979+ {}, " ind.escape" );
8980+ else if (ScalarTy->isFloatingPointTy ()) {
8981+ const auto &ID = WideIV->getInductionDescriptor ();
8982+ Escape = B.createNaryOp (
8983+ ID.getInductionBinOp ()->getOpcode () == Instruction::FAdd
8984+ ? Instruction::FSub
8985+ : Instruction::FAdd,
8986+ {EndValue, Step}, {ID.getInductionBinOp ()->getFastMathFlags ()});
8987+ } else {
8988+ llvm_unreachable (" all possible induction types must be handled" );
8989+ }
8990+ ExitIRI->setOperand (0 , Escape);
8991+ return true ;
8992+ }
8993+ return false ;
8994+ }
90498995// Add exit values to \p Plan. Extracts are added for each entry in \p
90508996// ExitUsersToFix if needed and their operands are updated. Returns true if all
90518997// exit users can be handled, otherwise return false.
9052- static bool
9053- addUsersInExitBlocks (VPlan &Plan,
9054- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8998+ static bool addUsersInExitBlocks (
8999+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
9000+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
9001+ DenseMap<VPValue *, VPValue *> &EndValues) {
90559002 if (ExitUsersToFix.empty ())
90569003 return true ;
90579004
90589005 auto *MiddleVPBB = Plan.getMiddleBlock ();
90599006 VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
9007+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
90609008
90619009 // Introduce extract for exiting values and update the VPIRInstructions
90629010 // modeling the corresponding LCSSA phis.
@@ -9072,11 +9020,16 @@ addUsersInExitBlocks(VPlan &Plan,
90729020 if (ExitIRI->getParent ()->getSinglePredecessor () != MiddleVPBB)
90739021 return false ;
90749022
9023+ VPValue *Incoming = ExitIRI->getOperand (0 );
9024+ if (addInductionEndValue (Plan, ExitIRI, Incoming, Inductions, EndValues,
9025+ TypeInfo))
9026+ continue ;
9027+
90759028 LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
90769029 VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
90779030 {Op, Plan.getOrAddLiveIn (ConstantInt::get (
90789031 IntegerType::get (Ctx, 32 ), 1 ))});
9079- ExitIRI->setOperand (Idx , Ext);
9032+ ExitIRI->setOperand (0 , Ext);
90809033 }
90819034 }
90829035 return true ;
@@ -9371,11 +9324,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93719324 VPlanTransforms::handleUncountableEarlyExit (
93729325 *Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93739326 }
9374- addScalarResumePhis (RecipeBuilder, *Plan);
9375- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
9376- OrigLoop, RecipeBuilder, *Plan);
9327+ DenseMap<VPValue *, VPValue *> EndValues;
9328+ addScalarResumePhis (RecipeBuilder, *Plan, OrigLoop, EndValues);
9329+ SetVector<VPIRInstruction *> ExitUsersToFix =
9330+ collectUsersInExitBlocks (OrigLoop, RecipeBuilder, *Plan);
93779331 addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9378- if (!addUsersInExitBlocks (*Plan, ExitUsersToFix)) {
9332+ if (!addUsersInExitBlocks (*Plan, ExitUsersToFix,
9333+ EndValues)) {
93799334 reportVectorizationFailure (
93809335 " Some exit values in loop with uncountable exit not supported yet" ,
93819336 " UncountableEarlyExitLoopsUnsupportedExitValue" , ORE, OrigLoop);
@@ -9502,7 +9457,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
95029457 auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
95039458 RecipeBuilder.setRecipe (HeaderR->getUnderlyingInstr (), HeaderR);
95049459 }
9505- addScalarResumePhis (RecipeBuilder, *Plan);
9460+ DenseMap<VPValue *, VPValue *> EndValues;
9461+ addScalarResumePhis (RecipeBuilder, *Plan, OrigLoop, EndValues);
95069462
95079463 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
95089464 return Plan;
0 commit comments