@@ -519,9 +519,9 @@ class InnerLoopVectorizer {
519519 // / and the resume values can come from an additional bypass block, the \p
520520 // / AdditionalBypass pair provides information about the bypass block and the
521521 // / end value on the edge from bypass to this loop.
522- PHINode * createInductionResumeValue (
522+ void createInductionResumeValue (
523523 PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
524- ArrayRef<BasicBlock *> BypassBlocks,
524+ ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
525525 std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
526526
527527 // / Returns the original loop trip count.
@@ -532,6 +532,11 @@ class InnerLoopVectorizer {
532532 // / count of the original loop for both main loop and epilogue vectorization.
533533 void setTripCount (Value *TC) { TripCount = TC; }
534534
535+ std::pair<BasicBlock *, Value *>
536+ getInductionBypassValue (PHINode *OrigPhi) const {
537+ return InductionBypassValues.find (OrigPhi)->second ;
538+ }
539+
535540protected:
536541 friend class LoopVectorizationPlanner ;
537542
@@ -667,6 +672,9 @@ class InnerLoopVectorizer {
667672 // / for cleaning the checks, if vectorization turns out unprofitable.
668673 GeneratedRTChecks &RTChecks;
669674
675+ // / Mapping of induction phis to their bypass values and bypass blocks.
676+ DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
677+
670678 VPlan &Plan;
671679};
672680
@@ -2591,9 +2599,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25912599 nullptr , Twine (Prefix) + " scalar.ph" );
25922600}
25932601
2594- PHINode * InnerLoopVectorizer::createInductionResumeValue (
2602+ void InnerLoopVectorizer::createInductionResumeValue (
25952603 PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2596- ArrayRef<BasicBlock *> BypassBlocks,
2604+ ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
25972605 std::pair<BasicBlock *, Value *> AdditionalBypass) {
25982606 Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
25992607 assert (VectorTripCount && " Expected valid arguments" );
@@ -2626,27 +2634,21 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26262634 }
26272635 }
26282636
2629- // Create phi nodes to merge from the backedge-taken check block.
2630- PHINode *BCResumeVal =
2631- PHINode::Create (OrigPhi->getType (), 3 , " bc.resume.val" ,
2632- LoopScalarPreHeader->getFirstNonPHIIt ());
2633- // Copy original phi DL over to the new one.
2634- BCResumeVal->setDebugLoc (OrigPhi->getDebugLoc ());
2635-
2636- // The new PHI merges the original incoming value, in case of a bypass,
2637- // or the value at the end of the vectorized loop.
2638- BCResumeVal->addIncoming (EndValue, LoopMiddleBlock);
2639-
2640- // Fix the scalar body counter (PHI node).
2641- // The old induction's phi node in the scalar body needs the truncated
2642- // value.
2643- for (BasicBlock *BB : BypassBlocks)
2644- BCResumeVal->addIncoming (II.getStartValue (), BB);
2637+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
2638+ VPInstruction::ResumePhi,
2639+ {Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2640+ OrigPhi->getDebugLoc (), " bc.resume.val" );
2641+ auto *ScalarLoopHeader = Plan.getScalarHeader ();
2642+ for (VPRecipeBase &R : *ScalarLoopHeader) {
2643+ auto *IRI = cast<VPIRInstruction>(&R);
2644+ if (&IRI->getInstruction () == OrigPhi) {
2645+ IRI->addOperand (ResumePhiRecipe);
2646+ break ;
2647+ }
2648+ }
26452649
2646- if (AdditionalBypass.first )
2647- BCResumeVal->setIncomingValueForBlock (AdditionalBypass.first ,
2648- EndValueFromAdditionalBypass);
2649- return BCResumeVal;
2650+ InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
2651+ EndValueFromAdditionalBypass};
26502652}
26512653
26522654// / Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2676,13 +2678,14 @@ void InnerLoopVectorizer::createInductionResumeValues(
26762678 // iteration in the vectorized loop.
26772679 // If we come from a bypass edge then we need to start from the original
26782680 // start value.
2681+ VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
2682+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
26792683 for (const auto &InductionEntry : Legal->getInductionVars ()) {
26802684 PHINode *OrigPhi = InductionEntry.first ;
26812685 const InductionDescriptor &II = InductionEntry.second ;
2682- PHINode *BCResumeVal = createInductionResumeValue (
2683- OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
2684- AdditionalBypass);
2685- OrigPhi->setIncomingValueForBlock (LoopScalarPreHeader, BCResumeVal);
2686+ createInductionResumeValue (OrigPhi, II, getExpandedStep (II, ExpandedSCEVs),
2687+ LoopBypassBlocks, ScalarPHBuilder,
2688+ AdditionalBypass);
26862689 }
26872690}
26882691
@@ -7808,6 +7811,27 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78087811 // the second pass for the scalar loop. The induction resume values for the
78097812 // inductions in the epilogue loop are created before executing the plan for
78107813 // the epilogue loop.
7814+ VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
7815+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
7816+ for (VPRecipeBase &R :
7817+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
7818+ // Create induction resume values for both widened pointer and
7819+ // integer/fp inductions and update the start value of the induction
7820+ // recipes to use the resume value.
7821+ PHINode *IndPhi = nullptr ;
7822+ const InductionDescriptor *ID;
7823+ if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
7824+ IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
7825+ ID = &Ind->getInductionDescriptor ();
7826+ } else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
7827+ IndPhi = WidenInd->getPHINode ();
7828+ ID = &WidenInd->getInductionDescriptor ();
7829+ } else
7830+ continue ;
7831+
7832+ createInductionResumeValue (IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
7833+ LoopBypassBlocks, ScalarPHBuilder);
7834+ }
78117835
78127836 return {LoopVectorPreHeader, nullptr };
78137837}
@@ -10296,23 +10320,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1029610320 RdxDesc.getRecurrenceStartValue ());
1029710321 }
1029810322 } else {
10299- // Create induction resume values for both widened pointer and
10300- // integer/fp inductions and update the start value of the induction
10301- // recipes to use the resume value.
10323+ // Retrive the induction resume values for wide inductions from
10324+ // their original phi nodes in the scalar loop
1030210325 PHINode *IndPhi = nullptr ;
10303- const InductionDescriptor *ID;
1030410326 if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1030510327 IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
10306- ID = &Ind->getInductionDescriptor ();
1030710328 } else {
1030810329 auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1030910330 IndPhi = WidenInd->getPHINode ();
10310- ID = &WidenInd->getInductionDescriptor ();
1031110331 }
10312-
10313- ResumeV = MainILV.createInductionResumeValue (
10314- IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
10315- {EPI.MainLoopIterationCountCheck });
10332+ ResumeV = IndPhi->getIncomingValueForBlock (L->getLoopPreheader ());
1031610333 }
1031710334 assert (ResumeV && " Must have a resume value" );
1031810335 VPValue *StartVal = BestEpiPlan.getOrAddLiveIn (ResumeV);
@@ -10324,7 +10341,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1032410341 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
1032510342 DT, true , &ExpandedSCEVs);
1032610343 ++LoopsEpilogueVectorized;
10344+ BasicBlock *PH = L->getLoopPreheader ();
1032710345
10346+ for (const auto &[IVPhi, _] : LVL.getInductionVars ()) {
10347+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10348+ const auto &[BB, V] = EpilogILV.getInductionBypassValue (IVPhi);
10349+ Inc->setIncomingValueForBlock (BB, V);
10350+ }
1032810351 if (!MainILV.areSafetyChecksAdded ())
1032910352 DisableRuntimeUnroll = true ;
1033010353 } else {
0 commit comments