@@ -513,16 +513,17 @@ class InnerLoopVectorizer {
513513 // / Fix the non-induction PHIs in \p Plan.
514514 void fixNonInductionPHIs (VPTransformState &State);
515515
516- // / Create a new phi node for the induction variable \p OrigPhi to resume
517- // / iteration count in the scalar epilogue, from where the vectorized loop
518- // / left off. \p Step is the SCEV-expanded induction step to use. In cases
519- // / where the loop skeleton is more complicated (i.e., epilogue vectorization)
520- // / and the resume values can come from an additional bypass block, the \p
521- // / AdditionalBypass pair provides information about the bypass block and the
522- // / end value on the edge from bypass to this loop.
523- PHINode *createInductionResumeValue (
524- PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
525- ArrayRef<BasicBlock *> BypassBlocks,
516+ // / Create a ResumePHI VPInstruction for the induction \p PhiRIR to resume
517+ // / iteration count in the scalar epilogue from where the vectorized loop
518+ // / left off, and add it to the scalar preheader of VPlan. \p Step is the
519+ // / SCEV-expanded induction step to use. In cases where the loop skeleton is
520+ // / more complicated (i.e., epilogue vectorization) and the resume values can
521+ // / come from an additional bypass block, the \p AdditionalBypass pair
522+ // / provides this additional bypass block along with the resume value coming
523+ // / from it.
524+ void createInductionResumeVPValue (
525+ VPIRInstruction *PhiIRI, const InductionDescriptor &ID, Value *Step,
526+ ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
526527 std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
527528
528529 // / Returns the original loop trip count.
@@ -533,6 +534,11 @@ class InnerLoopVectorizer {
533534 // / count of the original loop for both main loop and epilogue vectorization.
534535 void setTripCount (Value *TC) { TripCount = TC; }
535536
537+ std::pair<BasicBlock *, Value *>
538+ getInductionBypassValue (PHINode *OrigPhi) const {
539+ return InductionBypassValues.at (OrigPhi);
540+ }
541+
536542protected:
537543 friend class LoopVectorizationPlanner ;
538544
@@ -572,7 +578,7 @@ class InnerLoopVectorizer {
572578 // / vectorization) and the resume values can come from an additional bypass
573579 // / block, the \p AdditionalBypass pair provides information about the bypass
574580 // / block and the end value on the edge from bypass to this loop.
575- void createInductionResumeValues (
581+ void createInductionResumeVPValues (
576582 const SCEV2ValueTy &ExpandedSCEVs,
577583 std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
578584
@@ -664,6 +670,11 @@ class InnerLoopVectorizer {
664670 // / for cleaning the checks, if vectorization turns out unprofitable.
665671 GeneratedRTChecks &RTChecks;
666672
673+ // / Mapping of induction phis to their bypass values and bypass blocks. They
674+ // / need to be added to their phi nodes after the epilogue skeleton has been
675+ // / created.
676+ DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
677+
667678 VPlan &Plan;
668679};
669680
@@ -2580,10 +2591,11 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25802591 nullptr , Twine (Prefix) + " scalar.ph" );
25812592}
25822593
2583- PHINode * InnerLoopVectorizer::createInductionResumeValue (
2584- PHINode *OrigPhi , const InductionDescriptor &II, Value *Step,
2585- ArrayRef<BasicBlock *> BypassBlocks,
2594+ void InnerLoopVectorizer::createInductionResumeVPValue (
2595+ VPIRInstruction *PhiR , const InductionDescriptor &II, Value *Step,
2596+ ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
25862597 std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598+ auto *OrigPhi = cast<PHINode>(&PhiR->getInstruction ());
25872599 Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
25882600 assert (VectorTripCount && " Expected valid arguments" );
25892601
@@ -2615,27 +2627,21 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26152627 }
26162628 }
26172629
2618- // Create phi nodes to merge from the backedge-taken check block.
2619- PHINode *BCResumeVal =
2620- PHINode::Create (OrigPhi-> getType ( ), 3 , " bc.resume.val " ,
2621- LoopScalarPreHeader-> getFirstNonPHIIt () );
2622- // Copy original phi DL over to the new one.
2623- BCResumeVal-> setDebugLoc (OrigPhi-> getDebugLoc () );
2630+ auto *ResumePhiRecipe = ScalarPHBuilder. createNaryOp (
2631+ VPInstruction::ResumePhi,
2632+ {Plan. getOrAddLiveIn (EndValue ), Plan. getOrAddLiveIn (II. getStartValue ())} ,
2633+ OrigPhi-> getDebugLoc (), " bc.resume.val " );
2634+ assert (PhiR-> getNumOperands () == 0 && " PhiR should not have any operands " );
2635+ PhiR-> addOperand (ResumePhiRecipe );
26242636
2625- // The new PHI merges the original incoming value, in case of a bypass,
2626- // or the value at the end of the vectorized loop.
2627- BCResumeVal->addIncoming (EndValue, LoopMiddleBlock);
2628-
2629- // Fix the scalar body counter (PHI node).
2630- // The old induction's phi node in the scalar body needs the truncated
2631- // value.
2632- for (BasicBlock *BB : BypassBlocks)
2633- BCResumeVal->addIncoming (II.getStartValue (), BB);
2634-
2635- if (AdditionalBypass.first )
2636- BCResumeVal->setIncomingValueForBlock (AdditionalBypass.first ,
2637- EndValueFromAdditionalBypass);
2638- return BCResumeVal;
2637+ if (AdditionalBypass.first ) {
2638+ // Store the bypass values here, as they need to be added to their phi nodes
2639+ // after the epilogue skeleton has been created.
2640+ assert (!InductionBypassValues.contains (OrigPhi) &&
2641+ " entry for OrigPhi already exits" );
2642+ InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
2643+ EndValueFromAdditionalBypass};
2644+ }
26392645}
26402646
26412647// / Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2652,26 +2658,31 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26522658 return I->second ;
26532659}
26542660
2655- void InnerLoopVectorizer::createInductionResumeValues (
2661+ void InnerLoopVectorizer::createInductionResumeVPValues (
26562662 const SCEV2ValueTy &ExpandedSCEVs,
26572663 std::pair<BasicBlock *, Value *> AdditionalBypass) {
26582664 assert (((AdditionalBypass.first && AdditionalBypass.second ) ||
26592665 (!AdditionalBypass.first && !AdditionalBypass.second )) &&
26602666 " Inconsistent information about additional bypass." );
26612667 // We are going to resume the execution of the scalar loop.
2662- // Go over all of the induction variables that we found and fix the
2663- // PHIs that are left in the scalar version of the loop.
2664- // The starting values of PHI nodes depend on the counter of the last
2665- // iteration in the vectorized loop.
2666- // If we come from a bypass edge then we need to start from the original
2668+ // Go over all of the induction variables in the scalar header and fix the
2669+ // PHIs that are left in the scalar version of the loop. The starting values
2670+ // of PHI nodes depend on the counter of the last iteration in the vectorized
2671+ // loop. If we come from a bypass edge then we need to start from the original
26672672 // start value.
2668- for (const auto &InductionEntry : Legal->getInductionVars ()) {
2669- PHINode *OrigPhi = InductionEntry.first ;
2670- const InductionDescriptor &II = InductionEntry.second ;
2671- PHINode *BCResumeVal = createInductionResumeValue (
2672- OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
2673- AdditionalBypass);
2674- OrigPhi->setIncomingValueForBlock (LoopScalarPreHeader, BCResumeVal);
2673+ VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
2674+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
2675+ for (VPRecipeBase &R : *Plan.getScalarHeader ()) {
2676+ auto *PhiR = cast<VPIRInstruction>(&R);
2677+ auto *Phi = dyn_cast<PHINode>(&PhiR->getInstruction ());
2678+ if (!Phi)
2679+ break ;
2680+ if (!Legal->getInductionVars ().contains (Phi))
2681+ continue ;
2682+ const InductionDescriptor &II = Legal->getInductionVars ().find (Phi)->second ;
2683+ createInductionResumeVPValue (PhiR, II, getExpandedStep (II, ExpandedSCEVs),
2684+ LoopBypassBlocks, ScalarPHBuilder,
2685+ AdditionalBypass);
26752686 }
26762687}
26772688
@@ -2734,7 +2745,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
27342745 emitMemRuntimeChecks (LoopScalarPreHeader);
27352746
27362747 // Emit phis for the new starting index of the scalar loop.
2737- createInductionResumeValues (ExpandedSCEVs);
2748+ createInductionResumeVPValues (ExpandedSCEVs);
27382749
27392750 return {LoopVectorPreHeader, nullptr };
27402751}
@@ -7745,13 +7756,21 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77457756
77467757 BestVPlan.execute (&State);
77477758
7748- // 2.5 Collect reduction resume values.
77497759 auto *ExitVPBB = BestVPlan.getMiddleBlock ();
7750- if (VectorizingEpilogue)
7760+ // 2.5 When vectorizing the epilogue, fix reduction resume values and
7761+ // induction resume values from the bypass blocks.
7762+ if (VectorizingEpilogue) {
77517763 for (VPRecipeBase &R : *ExitVPBB) {
77527764 fixReductionScalarResumeWhenVectorizingEpilog (
77537765 &R, State, State.CFG .VPBB2IRBB [ExitVPBB]);
77547766 }
7767+ BasicBlock *PH = OrigLoop->getLoopPreheader ();
7768+ for (const auto &[IVPhi, _] : Legal->getInductionVars ()) {
7769+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
7770+ const auto &[BB, V] = ILV.getInductionBypassValue (IVPhi);
7771+ Inc->setIncomingValueForBlock (BB, V);
7772+ }
7773+ }
77557774
77567775 // 2.6. Maintain Loop Hints
77577776 // Keep all loop hints from the original loop on the vector loop (we'll
@@ -7840,10 +7859,10 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78407859 // Generate the induction variable.
78417860 EPI.VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
78427861
7843- // Skip induction resume value creation here because they will be created in
7844- // the second pass for the scalar loop. The induction resume values for the
7845- // inductions in the epilogue loop are created before executing the plan for
7846- // the epilogue loop.
7862+ // Create induction resume values and ResumePhis for the inductions in the
7863+ // epilogue loop in the VPlan for the epilogue vector loop.
7864+ VPBasicBlock *ScalarPHVPBB = Plan. getScalarPreheader ();
7865+ createInductionResumeVPValues (ExpandedSCEVs);
78477866
78487867 return {LoopVectorPreHeader, nullptr };
78497868}
@@ -8024,9 +8043,9 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80248043 // check, then the resume value for the induction variable comes from
80258044 // the trip count of the main vector loop, hence passing the AdditionalBypass
80268045 // argument.
8027- createInductionResumeValues (ExpandedSCEVs,
8028- {VecEpilogueIterationCountCheck,
8029- EPI.VectorTripCount } /* AdditionalBypass */ );
8046+ createInductionResumeVPValues (ExpandedSCEVs,
8047+ {VecEpilogueIterationCountCheck,
8048+ EPI.VectorTripCount } /* AdditionalBypass */ );
80308049
80318050 return {LoopVectorPreHeader, EPResumeVal};
80328051}
@@ -10327,23 +10346,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1032710346 RdxDesc.getRecurrenceStartValue ());
1032810347 }
1032910348 } else {
10330- // Create induction resume values for both widened pointer and
10331- // integer/fp inductions and update the start value of the induction
10332- // recipes to use the resume value.
10349+ // Retrieve the induction resume values for wide inductions from
10350+ // their original phi nodes in the scalar loop.
1033310351 PHINode *IndPhi = nullptr ;
10334- const InductionDescriptor *ID;
1033510352 if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1033610353 IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
10337- ID = &Ind->getInductionDescriptor ();
1033810354 } else {
1033910355 auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1034010356 IndPhi = WidenInd->getPHINode ();
10341- ID = &WidenInd->getInductionDescriptor ();
1034210357 }
10343-
10344- ResumeV = MainILV.createInductionResumeValue (
10345- IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
10346- {EPI.MainLoopIterationCountCheck });
10358+ ResumeV = IndPhi->getIncomingValueForBlock (L->getLoopPreheader ());
1034710359 }
1034810360 assert (ResumeV && " Must have a resume value" );
1034910361 VPValue *StartVal = BestEpiPlan.getOrAddLiveIn (ResumeV);
@@ -10355,7 +10367,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1035510367 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
1035610368 DT, true , &ExpandedSCEVs);
1035710369 ++LoopsEpilogueVectorized;
10358-
1035910370 if (!MainILV.areSafetyChecksAdded ())
1036010371 DisableRuntimeUnroll = true ;
1036110372 } else {
0 commit comments