@@ -779,10 +779,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
779779 BasicBlock *emitIterationCountCheck (BasicBlock *Bypass, bool ForEpilogue);
780780 void printDebugTracesAtStart () override ;
781781 void printDebugTracesAtEnd () override ;
782-
783- void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
784- Value *VectorTripCount, BasicBlock *MiddleBlock,
785- VPlan &Plan, VPTransformState &State) override {};
786782};
787783
788784// A specialized derived class of inner loop vectorizer that performs
@@ -2697,87 +2693,6 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
26972693 return {LoopVectorPreHeader, nullptr };
26982694}
26992695
2700- // Fix up external users of the induction variable. At this point, we are
2701- // in LCSSA form, with all external PHIs that use the IV having one input value,
2702- // coming from the remainder loop. We need those PHIs to also have a correct
2703- // value for the IV when arriving directly from the middle block.
2704- void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2705- const InductionDescriptor &II,
2706- Value *VectorTripCount,
2707- BasicBlock *MiddleBlock, VPlan &Plan,
2708- VPTransformState &State) {
2709- // There are two kinds of external IV usages - those that use the value
2710- // computed in the last iteration (the PHI) and those that use the penultimate
2711- // value (the value that feeds into the phi from the loop latch).
2712- // We allow both, but they, obviously, have different values.
2713-
2714- assert (OrigLoop->getUniqueExitBlock () && " Expected a single exit block" );
2715-
2716- DenseMap<Value *, Value *> MissingVals;
2717-
2718- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
2719- OrigLoop->getLoopPreheader ()))
2720- ->getIncomingValueForBlock (MiddleBlock);
2721-
2722- // An external user of the last iteration's value should see the value that
2723- // the remainder loop uses to initialize its own IV.
2724- Value *PostInc = OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
2725- for (User *U : PostInc->users ()) {
2726- Instruction *UI = cast<Instruction>(U);
2727- if (!OrigLoop->contains (UI)) {
2728- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2729- MissingVals[UI] = EndValue;
2730- }
2731- }
2732-
2733- // An external user of the penultimate value need to see EndValue - Step.
2734- // The simplest way to get this is to recompute it from the constituent SCEVs,
2735- // that is Start + (Step * (CRD - 1)).
2736- for (User *U : OrigPhi->users ()) {
2737- auto *UI = cast<Instruction>(U);
2738- if (!OrigLoop->contains (UI)) {
2739- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2740- IRBuilder<> B (MiddleBlock->getTerminator ());
2741-
2742- // Fast-math-flags propagate from the original induction instruction.
2743- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2744- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2745-
2746- VPValue *StepVPV = Plan.getSCEVExpansion (II.getStep ());
2747- assert (StepVPV && " step must have been expanded during VPlan execution" );
2748- Value *Step = StepVPV->isLiveIn () ? StepVPV->getLiveInIRValue ()
2749- : State.get (StepVPV, VPLane (0 ));
2750- Value *Escape = nullptr ;
2751- if (EndValue->getType ()->isIntegerTy ())
2752- Escape = B.CreateSub (EndValue, Step);
2753- else if (EndValue->getType ()->isPointerTy ())
2754- Escape = B.CreatePtrAdd (EndValue, B.CreateNeg (Step));
2755- else if (EndValue->getType ()->isFloatingPointTy ()) {
2756- Escape = B.CreateBinOp (II.getInductionBinOp ()->getOpcode () ==
2757- Instruction::FAdd
2758- ? Instruction::FSub
2759- : Instruction::FAdd,
2760- EndValue, Step);
2761- } else {
2762- llvm_unreachable (" all possible induction types must be handled" );
2763- }
2764- Escape->setName (" ind.escape" );
2765- MissingVals[UI] = Escape;
2766- }
2767- }
2768-
2769- for (auto &I : MissingVals) {
2770- PHINode *PHI = cast<PHINode>(I.first );
2771- // One corner case we have to handle is two IVs "chasing" each-other,
2772- // that is %IV2 = phi [...], [ %IV1, %latch ]
2773- // In this case, if IV1 has an external use, we need to avoid adding both
2774- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2775- // don't already have an incoming value for the middle block.
2776- if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2777- PHI->addIncoming (I.second , MiddleBlock);
2778- }
2779- }
2780-
27812696namespace {
27822697
27832698struct CSEDenseMapInfo {
@@ -2907,25 +2822,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29072822 for (PHINode &PN : Exit->phis ())
29082823 PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
29092824
2910- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2911- // No edge from the middle block to the unique exit block has been inserted
2912- // and there is nothing to fix from vector loop; phis should have incoming
2913- // from scalar loop only.
2914- } else {
2915- // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2916- // the cost model.
2917-
2918- // If we inserted an edge from the middle block to the unique exit block,
2919- // update uses outside the loop (phis) to account for the newly inserted
2920- // edge.
2921-
2922- // Fix-up external users of the induction variables.
2923- for (const auto &Entry : Legal->getInductionVars ())
2924- fixupIVUsers (Entry.first , Entry.second ,
2925- getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, Plan,
2926- State);
2927- }
2928-
29292825 for (Instruction *PI : PredicatedInstructions)
29302826 sinkScalarOperands (&*PI);
29312827
@@ -8821,7 +8717,7 @@ addUsersInExitBlock(VPlan &Plan,
88218717 }
88228718}
88238719
8824- static void addResumeValuesForInductions (VPlan &Plan) {
8720+ static void addResumeValuesForInductions (VPlan &Plan, Loop *OrigLoop ) {
88258721 VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
88268722 VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
88278723
@@ -8870,9 +8766,11 @@ static void addResumeValuesForInductions(VPlan &Plan) {
88708766 cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
88718767
88728768 VPBasicBlock *ScalarPHVPBB = nullptr ;
8769+ VPBasicBlock *ExitVPBB = nullptr ;
88738770 if (MiddleVPBB->getNumSuccessors () == 2 ) {
88748771 // Order is strict: first is the exit block, second is the scalar
88758772 // preheader.
8773+ ExitVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[0 ]);
88768774 ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
88778775 } else {
88788776 ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
@@ -8886,6 +8784,53 @@ static void addResumeValuesForInductions(VPlan &Plan) {
88868784 auto *ScalarLoopHeader =
88878785 cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
88888786 addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8787+
8788+ if (ExitVPBB) {
8789+
8790+ Value *PostInc =
8791+ OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
8792+ for (auto &R : *ExitVPBB) {
8793+ auto *VPIRInst = cast<VPIRInstruction>(&R);
8794+ auto *IRI = &VPIRInst->getInstruction ();
8795+ if (!isa<PHINode>(IRI))
8796+ break ;
8797+ // Skip phi nodes already updated. This can be the case if 2 induction
8798+ // phis chase each other.
8799+ if (VPIRInst->getNumOperands () == 1 )
8800+ continue ;
8801+ if (any_of (IRI->operands (),
8802+ [PostInc](Value *Op) { return Op == PostInc; })) {
8803+ VPIRInst->addOperand (EndValue);
8804+ continue ;
8805+ }
8806+
8807+ if (any_of (IRI->operands (),
8808+ [OrigPhi](Value *Op) { return Op == OrigPhi; })) {
8809+ VPBuilder B (MiddleVPBB->getTerminator ());
8810+ VPValue *Escape = nullptr ;
8811+ if (ScalarTy->isIntegerTy ())
8812+ Escape = B.createNaryOp (Instruction::Sub, {EndValue, Step});
8813+ else if (ScalarTy->isPointerTy ())
8814+ Escape = B.createPtrAdd (
8815+ EndValue,
8816+ B.createNaryOp (
8817+ Instruction::Xor,
8818+ {Step, Plan.getOrAddLiveIn (ConstantInt::get (
8819+ Step->getLiveInIRValue ()->getType (), -1 ))}));
8820+ else if (ScalarTy->isFloatingPointTy ()) {
8821+ Escape = B.createNaryOp (
8822+ ID->getInductionBinOp ()->getOpcode () == Instruction::FAdd
8823+ ? Instruction::FSub
8824+ : Instruction::FAdd,
8825+ {EndValue, Step},
8826+ {ID->getInductionBinOp ()->getFastMathFlags ()});
8827+ } else {
8828+ llvm_unreachable (" all possible induction types must be handled" );
8829+ }
8830+ VPIRInst->addOperand (Escape);
8831+ }
8832+ }
8833+ }
88898834 }
88908835}
88918836
@@ -9199,7 +9144,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91999144 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
92009145 addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
92019146 addUsersInExitBlock (*Plan, ExitUsersToFix);
9202- addResumeValuesForInductions (*Plan);
9147+ addResumeValuesForInductions (*Plan, OrigLoop );
92039148
92049149 // ---------------------------------------------------------------------------
92059150 // Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9305,7 +9250,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
93059250 bool HasNUW = true ;
93069251 addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
93079252 DebugLoc ());
9308- addResumeValuesForInductions (*Plan);
9253+ addResumeValuesForInductions (*Plan, OrigLoop );
93099254 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
93109255 return Plan;
93119256}
0 commit comments