@@ -543,11 +543,6 @@ class InnerLoopVectorizer {
543543protected:
544544 friend class LoopVectorizationPlanner;
545545
546- /// Set up the values of the IVs correctly when exiting the vector loop.
547- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
548- Value *VectorTripCount, BasicBlock *MiddleBlock,
549- VPTransformState &State);
550-
551546 /// Iteratively sink the scalarized operands of a predicated instruction into
552547 /// the block that was created for it.
553548 void sinkScalarOperands(Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785780 BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
786781 void printDebugTracesAtStart() override;
787782 void printDebugTracesAtEnd() override;
788-
789- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
790- Value *VectorTripCount, BasicBlock *MiddleBlock,
791- VPTransformState &State) override {};
792783};
793784
794785// A specialized derived class of inner loop vectorizer that performs
@@ -2782,97 +2773,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27822773 return LoopVectorPreHeader;
27832774}
27842775
2785- // Fix up external users of the induction variable. At this point, we are
2786- // in LCSSA form, with all external PHIs that use the IV having one input value,
2787- // coming from the remainder loop. We need those PHIs to also have a correct
2788- // value for the IV when arriving directly from the middle block.
2789- void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2790- const InductionDescriptor &II,
2791- Value *VectorTripCount,
2792- BasicBlock *MiddleBlock,
2793- VPTransformState &State) {
2794- // There are two kinds of external IV usages - those that use the value
2795- // computed in the last iteration (the PHI) and those that use the penultimate
2796- // value (the value that feeds into the phi from the loop latch).
2797- // We allow both, but they, obviously, have different values.
2798-
2799- DenseMap<Value *, Value *> MissingVals;
2800-
2801- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2802- OrigLoop->getLoopPreheader()))
2803- ->getIncomingValueForBlock(MiddleBlock);
2804-
2805- // An external user of the last iteration's value should see the value that
2806- // the remainder loop uses to initialize its own IV.
2807- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2808- for (User *U : PostInc->users()) {
2809- Instruction *UI = cast<Instruction>(U);
2810- if (!OrigLoop->contains(UI)) {
2811- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2812- MissingVals[UI] = EndValue;
2813- }
2814- }
2815-
2816- // An external user of the penultimate value need to see EndValue - Step.
2817- // The simplest way to get this is to recompute it from the constituent SCEVs,
2818- // that is Start + (Step * (CRD - 1)).
2819- for (User *U : OrigPhi->users()) {
2820- auto *UI = cast<Instruction>(U);
2821- if (!OrigLoop->contains(UI)) {
2822- assert(isa<PHINode>(UI) && "Expected LCSSA form");
2823- IRBuilder<> B(MiddleBlock->getTerminator());
2824-
2825- // Fast-math-flags propagate from the original induction instruction.
2826- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2827- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2828-
2829- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2830- assert(StepVPV && "step must have been expanded during VPlan execution");
2831- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2832- : State.get(StepVPV, VPLane(0));
2833- Value *Escape = nullptr;
2834- if (EndValue->getType()->isIntegerTy())
2835- Escape = B.CreateSub(EndValue, Step);
2836- else if (EndValue->getType()->isPointerTy())
2837- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2838- else {
2839- assert(EndValue->getType()->isFloatingPointTy() &&
2840- "Unexpected induction type");
2841- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2842- Instruction::FAdd
2843- ? Instruction::FSub
2844- : Instruction::FAdd,
2845- EndValue, Step);
2846- }
2847- Escape->setName("ind.escape");
2848- MissingVals[UI] = Escape;
2849- }
2850- }
2851-
2852- assert((MissingVals.empty() ||
2853- all_of(MissingVals,
2854- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2855- return all_of(
2856- predecessors(cast<Instruction>(P.first)->getParent()),
2857- [MiddleBlock, this](BasicBlock *Pred) {
2858- return Pred == MiddleBlock ||
2859- Pred == OrigLoop->getLoopLatch();
2860- });
2861- })) &&
2862- "Expected escaping values from latch/middle.block only");
2863-
2864- for (auto &I : MissingVals) {
2865- PHINode *PHI = cast<PHINode>(I.first);
2866- // One corner case we have to handle is two IVs "chasing" each-other,
2867- // that is %IV2 = phi [...], [ %IV1, %latch ]
2868- // In this case, if IV1 has an external use, we need to avoid adding both
2869- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2870- // don't already have an incoming value for the middle block.
2871- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2872- PHI->addIncoming(I.second, MiddleBlock);
2873- }
2874- }
2875-
28762776namespace {
28772777
28782778struct CSEDenseMapInfo {
@@ -2999,24 +2899,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29992899 for (PHINode &PN : Exit->phis())
30002900 PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
30012901
3002- if (Cost->requiresScalarEpilogue(VF.isVector())) {
3003- // No edge from the middle block to the unique exit block has been inserted
3004- // and there is nothing to fix from vector loop; phis should have incoming
3005- // from scalar loop only.
3006- } else {
3007- // TODO: Check in VPlan to see if IV users need fixing instead of checking
3008- // the cost model.
3009-
3010- // If we inserted an edge from the middle block to the unique exit block,
3011- // update uses outside the loop (phis) to account for the newly inserted
3012- // edge.
3013-
3014- // Fix-up external users of the induction variables.
3015- for (const auto &Entry : Legal->getInductionVars())
3016- fixupIVUsers(Entry.first, Entry.second,
3017- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
3018- }
3019-
30202902 // Don't apply optimizations below when no vector region remains, as they all
30212903 // require a vector loop at the moment.
30222904 if (!State.Plan->getVectorLoopRegion())
@@ -9049,11 +8931,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
90498931/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
90508932/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
90518933/// the end value of the induction.
9052- static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
9053- VPBuilder &VectorPHBuilder,
9054- VPBuilder &ScalarPHBuilder,
9055- VPTypeAnalysis &TypeInfo,
9056- VPValue *VectorTC) {
8934+ static VPInstruction *addResumePhiRecipeForInduction(
8935+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8936+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
90578937 auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
90588938 // Truncated wide inductions resume from the last lane of their vector value
90598939 // in the last vector iteration which is handled elsewhere.
@@ -9087,8 +8967,10 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
90878967
90888968/// Create resume phis in the scalar preheader for first-order recurrences,
90898969/// reductions and inductions, and update the VPIRInstructions wrapping the
9090- /// original phis in the scalar header.
9091- static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8970+ /// original phis in the scalar header. End values for inductions are added to
8971+ /// \p IVEndValues.
8972+ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8973+ DenseMap<VPValue *, VPValue *> &IVEndValues) {
90928974 VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
90938975 auto *ScalarPH = Plan.getScalarPreheader();
90948976 auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -9105,11 +8987,16 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
91058987 if (!ScalarPhiI)
91068988 break;
91078989
8990+ // TODO: Extract final value from induction recipe initially, optimize to
8991+ // pre-computed end value together in optimizeInductionExitUsers.
91088992 auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
91098993 if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
9110- if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
8994+ if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
91118995 WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
91128996 &Plan.getVectorTripCount())) {
8997+ assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
8998+ "Expected a ResumePhi");
8999+ IVEndValues[WideIVR] = ResumePhi->getOperand(0);
91139000 ScalarPhiIRI->addOperand(ResumePhi);
91149001 continue;
91159002 }
@@ -9140,65 +9027,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
91409027 }
91419028}
91429029
9143- /// Return true if \p VPV is an optimizable IV or IV use. That is, if \p VPV is
9144- /// either an untruncated wide induction, or if it increments a wide induction
9145- /// by its step.
9146- static bool isOptimizableIVOrUse(VPValue *VPV) {
9147- VPRecipeBase *Def = VPV->getDefiningRecipe();
9148- if (!Def)
9149- return false;
9150- auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Def);
9151- if (WideIV) {
9152- // VPV itself is a wide induction, separately compute the end value for exit
9153- // users if it is not a truncated IV.
9154- return isa<VPWidenPointerInductionRecipe>(WideIV) ||
9155- !cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst();
9156- }
9157-
9158- // Check if VPV is an optimizable induction increment.
9159- if (Def->getNumOperands() != 2)
9160- return false;
9161- WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
9162- if (!WideIV)
9163- WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
9164- if (!WideIV)
9165- return false;
9166-
9167- using namespace VPlanPatternMatch;
9168- auto &ID = WideIV->getInductionDescriptor();
9169-
9170- // Check if VPV increments the induction by the induction step.
9171- VPValue *IVStep = WideIV->getStepValue();
9172- switch (ID.getInductionOpcode()) {
9173- case Instruction::Add:
9174- return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),
9175- m_Specific(IVStep)));
9176- case Instruction::FAdd:
9177- return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),
9178- m_Specific(IVStep)));
9179- case Instruction::FSub:
9180- return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),
9181- m_Specific(IVStep)));
9182- case Instruction::Sub: {
9183- // IVStep will be the negated step of the subtraction. Check if Step == -1 *
9184- // IVStep.
9185- VPValue *Step;
9186- if (!match(VPV, m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
9187- !Step->isLiveIn() || !IVStep->isLiveIn())
9188- return false;
9189- auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
9190- auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());
9191- return StepCI && IVStepCI &&
9192- StepCI->getValue() == (-1 * IVStepCI->getValue());
9193- }
9194- default:
9195- return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
9196- match(VPV, m_GetElementPtr(m_Specific(WideIV),
9197- m_Specific(WideIV->getStepValue())));
9198- }
9199- llvm_unreachable("should have been covered by switch above");
9200- }
9201-
92029030// Collect VPIRInstructions for phis in the exit blocks that are modeled
92039031// in VPlan and add the exiting VPValue as operand. Some exiting values are not
92049032// modeled explicitly yet and won't be included. Those are un-truncated
@@ -9228,12 +9056,6 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
92289056 }
92299057 Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
92309058 VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9231- // Exit values for inductions are computed and updated outside of VPlan
9232- // and independent of induction recipes.
9233- // TODO: Compute induction exit values in VPlan.
9234- if (isOptimizableIVOrUse(V) &&
9235- ExitVPBB->getSinglePredecessor() == MiddleVPBB)
9236- continue;
92379059 ExitUsersToFix.insert(ExitIRI);
92389060 ExitIRI->addOperand(V);
92399061 }
@@ -9253,6 +9075,7 @@ addUsersInExitBlocks(VPlan &Plan,
92539075
92549076 auto *MiddleVPBB = Plan.getMiddleBlock();
92559077 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9078+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
92569079
92579080 // Introduce extract for exiting values and update the VPIRInstructions
92589081 // modeling the corresponding LCSSA phis.
@@ -9574,7 +9397,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95749397 VPlanTransforms::handleUncountableEarlyExit(
95759398 *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
95769399 }
9577- addScalarResumePhis(RecipeBuilder, *Plan);
9400+ DenseMap<VPValue *, VPValue *> IVEndValues;
9401+ addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
95789402 SetVector<VPIRInstruction *> ExitUsersToFix =
95799403 collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
95809404 addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
@@ -9657,6 +9481,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
96579481 VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
96589482 WithoutRuntimeCheck);
96599483 }
9484+ VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues);
96609485
96619486 assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
96629487 return Plan;
@@ -9708,7 +9533,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
97089533 auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
97099534 RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
97109535 }
9711- addScalarResumePhis(RecipeBuilder, *Plan);
9536+ DenseMap<VPValue *, VPValue *> IVEndValues;
9537+ // TODO: IVEndValues are not used yet in the native path, to optimize exit
9538+ // values.
9539+ addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
97129540
97139541 assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
97149542 return Plan;
0 commit comments