@@ -467,11 +467,12 @@ class InnerLoopVectorizer {
467467 ElementCount MinProfitableTripCount,
468468 unsigned UnrollFactor, LoopVectorizationLegality *LVL,
469469 LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
470- ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
470+ ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
471+ VPlan &Plan)
471472 : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
472473 AC (AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
473474 Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI),
474- PSI(PSI), RTChecks(RTChecks) {
475+ PSI(PSI), RTChecks(RTChecks), Plan(Plan) {
475476 // Query this against the original loop and save it here because the profile
476477 // of the original loop header may change as the transformation happens.
477478 OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize (
@@ -522,7 +523,7 @@ class InnerLoopVectorizer {
522523 // / and the resume values can come from an additional bypass block, the \p
523524 // / AdditionalBypass pair provides information about the bypass block and the
524525 // / end value on the edge from bypass to this loop.
525- PHINode * createInductionResumeValue (
526+ void createInductionResumeValue (
526527 PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
527528 ArrayRef<BasicBlock *> BypassBlocks,
528529 std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
@@ -535,6 +536,11 @@ class InnerLoopVectorizer {
535536 // / count of the original loop for both main loop and epilogue vectorization.
536537 void setTripCount (Value *TC) { TripCount = TC; }
537538
539+ std::pair<BasicBlock *, Value *>
540+ getInductionBypassValue (PHINode *OrigPhi) const {
541+ return InductionBypassValues.find (OrigPhi)->second ;
542+ }
543+
538544protected:
539545 friend class LoopVectorizationPlanner ;
540546
@@ -674,6 +680,11 @@ class InnerLoopVectorizer {
674680 // / Structure to hold information about generated runtime checks, responsible
675681 // / for cleaning the checks, if vectorization turns out unprofitable.
676682 GeneratedRTChecks &RTChecks;
683+
684+ // / Mapping of induction phis to their bypass values and bypass blocks.
685+ DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
686+
687+ VPlan &Plan;
677688};
678689
679690// / Encapsulate information regarding vectorization of a loop and its epilogue.
@@ -715,10 +726,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
715726 OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
716727 LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
717728 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
718- GeneratedRTChecks &Checks)
729+ GeneratedRTChecks &Checks, VPlan &Plan )
719730 : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
720731 EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL,
721- CM, BFI, PSI, Checks),
732+ CM, BFI, PSI, Checks, Plan ),
722733 EPI (EPI) {}
723734
724735 // Override this function to handle the more complex control flow around the
@@ -755,9 +766,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
755766 OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
756767 LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
757768 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
758- GeneratedRTChecks &Check)
769+ GeneratedRTChecks &Check, VPlan &Plan )
759770 : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
760- EPI, LVL, CM, BFI, PSI, Check) {}
771+ EPI, LVL, CM, BFI, PSI, Check, Plan ) {}
761772 // / Implements the interface for creating a vectorized skeleton using the
762773 // / *main loop* strategy (ie the first pass of vplan execution).
763774 std::pair<BasicBlock *, Value *>
@@ -789,9 +800,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
789800 OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
790801 LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
791802 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
792- GeneratedRTChecks &Checks)
803+ GeneratedRTChecks &Checks, VPlan &Plan )
793804 : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
794- EPI, LVL, CM, BFI, PSI, Checks) {
805+ EPI, LVL, CM, BFI, PSI, Checks, Plan ) {
795806 TripCount = EPI.TripCount ;
796807 }
797808 // / Implements the interface for creating a vectorized skeleton using the
@@ -2586,7 +2597,18 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25862597 nullptr , Twine (Prefix) + " scalar.ph" );
25872598}
25882599
2589- PHINode *InnerLoopVectorizer::createInductionResumeValue (
2600+ static void addOperandToPhiInVPIRBasicBlock (VPIRBasicBlock *VPBB, PHINode *P,
2601+ VPValue *Op) {
2602+ for (VPRecipeBase &R : *VPBB) {
2603+ auto *IRI = cast<VPIRInstruction>(&R);
2604+ if (&IRI->getInstruction () == P) {
2605+ IRI->addOperand (Op);
2606+ break ;
2607+ }
2608+ }
2609+ }
2610+
2611+ void InnerLoopVectorizer::createInductionResumeValue (
25902612 PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
25912613 ArrayRef<BasicBlock *> BypassBlocks,
25922614 std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -2621,27 +2643,28 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26212643 }
26222644 }
26232645
2624- // Create phi nodes to merge from the backedge-taken check block.
2625- PHINode *BCResumeVal =
2626- PHINode::Create (OrigPhi->getType (), 3 , " bc.resume.val" ,
2627- LoopScalarPreHeader->getFirstNonPHIIt ());
2628- // Copy original phi DL over to the new one.
2629- BCResumeVal->setDebugLoc (OrigPhi->getDebugLoc ());
2646+ VPBasicBlock *MiddleVPBB =
2647+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
26302648
2631- // The new PHI merges the original incoming value, in case of a bypass,
2632- // or the value at the end of the vectorized loop.
2633- BCResumeVal->addIncoming (EndValue, LoopMiddleBlock);
2649+ VPBasicBlock *ScalarPHVPBB = nullptr ;
2650+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
2651+ // Order is strict: first is the exit block, second is the scalar preheader.
2652+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
2653+ } else {
2654+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
2655+ }
26342656
2635- // Fix the scalar body counter (PHI node).
2636- // The old induction's phi node in the scalar body needs the truncated
2637- // value.
2638- for (BasicBlock *BB : BypassBlocks)
2639- BCResumeVal-> addIncoming (II. getStartValue ( ), BB );
2657+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
2658+ auto *ResumePhiRecipe = ScalarPHBuilder. createNaryOp (
2659+ VPInstruction::ResumePhi,
2660+ {Plan. getOrAddLiveIn (EndValue), Plan. getOrAddLiveIn (II. getStartValue ())},
2661+ OrigPhi-> getDebugLoc ( ), " bc.resume.val " );
26402662
2641- if (AdditionalBypass.first )
2642- BCResumeVal->setIncomingValueForBlock (AdditionalBypass.first ,
2643- EndValueFromAdditionalBypass);
2644- return BCResumeVal;
2663+ auto *ScalarLoopHeader =
2664+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
2665+ addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
2666+ InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
2667+ EndValueFromAdditionalBypass};
26452668}
26462669
26472670// / Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2674,10 +2697,8 @@ void InnerLoopVectorizer::createInductionResumeValues(
26742697 for (const auto &InductionEntry : Legal->getInductionVars ()) {
26752698 PHINode *OrigPhi = InductionEntry.first ;
26762699 const InductionDescriptor &II = InductionEntry.second ;
2677- PHINode *BCResumeVal = createInductionResumeValue (
2678- OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
2679- AdditionalBypass);
2680- OrigPhi->setIncomingValueForBlock (LoopScalarPreHeader, BCResumeVal);
2700+ createInductionResumeValue (OrigPhi, II, getExpandedStep (II, ExpandedSCEVs),
2701+ LoopBypassBlocks, AdditionalBypass);
26812702 }
26822703}
26832704
@@ -7787,6 +7808,25 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
77877808 // the second pass for the scalar loop. The induction resume values for the
77887809 // inductions in the epilogue loop are created before executing the plan for
77897810 // the epilogue loop.
7811+ for (VPRecipeBase &R :
7812+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
7813+ // Create induction resume values for both widened pointer and
7814+ // integer/fp inductions and update the start value of the induction
7815+ // recipes to use the resume value.
7816+ PHINode *IndPhi = nullptr ;
7817+ const InductionDescriptor *ID;
7818+ if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
7819+ IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
7820+ ID = &Ind->getInductionDescriptor ();
7821+ } else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
7822+ IndPhi = WidenInd->getPHINode ();
7823+ ID = &WidenInd->getInductionDescriptor ();
7824+ } else
7825+ continue ;
7826+
7827+ createInductionResumeValue (IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
7828+ LoopBypassBlocks);
7829+ }
77907830
77917831 return {LoopVectorPreHeader, nullptr };
77927832}
@@ -8945,14 +8985,9 @@ static void addLiveOutsForFirstOrderRecurrences(
89458985 VPInstruction::ResumePhi, {Resume, FOR->getStartValue ()}, {},
89468986 " scalar.recur.init" );
89478987 auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr ());
8948- for (VPRecipeBase &R :
8949- *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ())) {
8950- auto *IRI = cast<VPIRInstruction>(&R);
8951- if (&IRI->getInstruction () == FORPhi) {
8952- IRI->addOperand (ResumePhiRecipe);
8953- break ;
8954- }
8955- }
8988+ addOperandToPhiInVPIRBasicBlock (
8989+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ()), FORPhi,
8990+ ResumePhiRecipe);
89568991
89578992 // Now update VPIRInstructions modeling LCSSA phis in the exit block.
89588993 // Extract the penultimate value of the recurrence and use it as operand for
@@ -9679,7 +9714,7 @@ static bool processLoopInVPlanNativePath(
96799714 GeneratedRTChecks Checks (PSE, DT, LI, TTI, F->getDataLayout (),
96809715 AddBranchWeights);
96819716 InnerLoopVectorizer LB (L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width ,
9682- VF.Width , 1 , LVL, &CM, BFI, PSI, Checks);
9717+ VF.Width , 1 , LVL, &CM, BFI, PSI, Checks, BestPlan );
96839718 LLVM_DEBUG (dbgs () << " Vectorizing outer loop in \" "
96849719 << L->getHeader ()->getParent ()->getName () << " \"\n " );
96859720 LVP.executePlan (VF.Width , 1 , BestPlan, LB, DT, false );
@@ -10167,11 +10202,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1016710202 assert (IC > 1 && " interleave count should not be 1 or 0" );
1016810203 // If we decided that it is not legal to vectorize the loop, then
1016910204 // interleave it.
10205+ VPlan &BestPlan = LVP.getPlanFor (VF.Width );
1017010206 InnerLoopVectorizer Unroller (
1017110207 L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed (1 ),
10172- ElementCount::getFixed (1 ), IC, &LVL, &CM, BFI, PSI, Checks);
10208+ ElementCount::getFixed (1 ), IC, &LVL, &CM, BFI, PSI, Checks, BestPlan );
1017310209
10174- VPlan &BestPlan = LVP.getPlanFor (VF.Width );
1017510210 LVP.executePlan (VF.Width , IC, BestPlan, Unroller, DT, false );
1017610211
1017710212 ORE->emit ([&]() {
@@ -10193,10 +10228,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1019310228 // to be vectorized by executing the plan (potentially with a different
1019410229 // factor) again shortly afterwards.
1019510230 EpilogueLoopVectorizationInfo EPI (VF.Width , IC, EpilogueVF.Width , 1 );
10231+ std::unique_ptr<VPlan> BestMainPlan (BestPlan.duplicate ());
1019610232 EpilogueVectorizerMainLoop MainILV (L, PSE, LI, DT, TLI, TTI, AC, ORE,
10197- EPI, &LVL, &CM, BFI, PSI, Checks);
10233+ EPI, &LVL, &CM, BFI, PSI, Checks,
10234+ *BestMainPlan);
1019810235
10199- std::unique_ptr<VPlan> BestMainPlan (BestPlan.duplicate ());
1020010236 auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
1020110237 *BestMainPlan, MainILV, DT, true );
1020210238 ++LoopsVectorized;
@@ -10205,11 +10241,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1020510241 // edges from the first pass.
1020610242 EPI.MainLoopVF = EPI.EpilogueVF ;
1020710243 EPI.MainLoopUF = EPI.EpilogueUF ;
10244+ VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
1020810245 EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TLI, TTI, AC,
1020910246 ORE, EPI, &LVL, &CM, BFI, PSI,
10210- Checks);
10247+ Checks, BestEpiPlan );
1021110248
10212- VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
1021310249 VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion ();
1021410250 VPBasicBlock *Header = VectorLoop->getEntryBasicBlock ();
1021510251 Header->setName (" vec.epilog.vector.body" );
@@ -10258,23 +10294,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1025810294 RdxDesc.getRecurrenceStartValue ());
1025910295 }
1026010296 } else {
10261- // Create induction resume values for both widened pointer and
10262- // integer/fp inductions and update the start value of the induction
10263- // recipes to use the resume value.
10297+ // Retrive the induction resume values for wide inductions from
10298+ // their original phi nodes in the scalar loop
1026410299 PHINode *IndPhi = nullptr ;
10265- const InductionDescriptor *ID;
1026610300 if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1026710301 IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
10268- ID = &Ind->getInductionDescriptor ();
1026910302 } else {
1027010303 auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1027110304 IndPhi = WidenInd->getPHINode ();
10272- ID = &WidenInd->getInductionDescriptor ();
1027310305 }
10274-
10275- ResumeV = MainILV.createInductionResumeValue (
10276- IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
10277- {EPI.MainLoopIterationCountCheck });
10306+ ResumeV = IndPhi->getIncomingValueForBlock (L->getLoopPreheader ());
1027810307 }
1027910308 assert (ResumeV && " Must have a resume value" );
1028010309 VPValue *StartVal = BestEpiPlan.getOrAddLiveIn (ResumeV);
@@ -10286,13 +10315,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1028610315 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
1028710316 DT, true , &ExpandedSCEVs);
1028810317 ++LoopsEpilogueVectorized;
10318+ BasicBlock *PH = L->getLoopPreheader ();
1028910319
10320+ for (const auto &[IVPhi, _] : LVL.getInductionVars ()) {
10321+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10322+ const auto &[BB, V] = EpilogILV.getInductionBypassValue (IVPhi);
10323+ Inc->setIncomingValueForBlock (BB, V);
10324+ }
1029010325 if (!MainILV.areSafetyChecksAdded ())
1029110326 DisableRuntimeUnroll = true ;
1029210327 } else {
1029310328 InnerLoopVectorizer LB (L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width ,
1029410329 VF.MinProfitableTripCount , IC, &LVL, &CM, BFI,
10295- PSI, Checks);
10330+ PSI, Checks, BestPlan );
1029610331 LVP.executePlan (VF.Width , IC, BestPlan, LB, DT, false );
1029710332 ++LoopsVectorized;
1029810333
0 commit comments