@@ -519,9 +519,9 @@ class InnerLoopVectorizer {
519
519
// / and the resume values can come from an additional bypass block, the \p
520
520
// / AdditionalBypass pair provides information about the bypass block and the
521
521
// / end value on the edge from bypass to this loop.
522
- PHINode * createInductionResumeValue (
522
+ void createInductionResumeValue (
523
523
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
524
- ArrayRef<BasicBlock *> BypassBlocks,
524
+ ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
525
525
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
526
526
527
527
// / Returns the original loop trip count.
@@ -532,6 +532,11 @@ class InnerLoopVectorizer {
532
532
// / count of the original loop for both main loop and epilogue vectorization.
533
533
void setTripCount (Value *TC) { TripCount = TC; }
534
534
535
+ std::pair<BasicBlock *, Value *>
536
+ getInductionBypassValue (PHINode *OrigPhi) const {
537
+ return InductionBypassValues.find (OrigPhi)->second ;
538
+ }
539
+
535
540
protected:
536
541
friend class LoopVectorizationPlanner ;
537
542
@@ -667,6 +672,9 @@ class InnerLoopVectorizer {
667
672
// / for cleaning the checks, if vectorization turns out unprofitable.
668
673
GeneratedRTChecks &RTChecks;
669
674
675
+ // / Mapping of induction phis to their bypass values and bypass blocks.
676
+ DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
677
+
670
678
VPlan &Plan;
671
679
};
672
680
@@ -2591,9 +2599,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2591
2599
nullptr , Twine (Prefix) + " scalar.ph" );
2592
2600
}
2593
2601
2594
- PHINode * InnerLoopVectorizer::createInductionResumeValue (
2602
+ void InnerLoopVectorizer::createInductionResumeValue (
2595
2603
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2596
- ArrayRef<BasicBlock *> BypassBlocks,
2604
+ ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
2597
2605
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598
2606
Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2599
2607
assert (VectorTripCount && " Expected valid arguments" );
@@ -2626,27 +2634,21 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
2626
2634
}
2627
2635
}
2628
2636
2629
- // Create phi nodes to merge from the backedge-taken check block.
2630
- PHINode *BCResumeVal =
2631
- PHINode::Create (OrigPhi->getType (), 3 , " bc.resume.val" ,
2632
- LoopScalarPreHeader->getFirstNonPHIIt ());
2633
- // Copy original phi DL over to the new one.
2634
- BCResumeVal->setDebugLoc (OrigPhi->getDebugLoc ());
2635
-
2636
- // The new PHI merges the original incoming value, in case of a bypass,
2637
- // or the value at the end of the vectorized loop.
2638
- BCResumeVal->addIncoming (EndValue, LoopMiddleBlock);
2639
-
2640
- // Fix the scalar body counter (PHI node).
2641
- // The old induction's phi node in the scalar body needs the truncated
2642
- // value.
2643
- for (BasicBlock *BB : BypassBlocks)
2644
- BCResumeVal->addIncoming (II.getStartValue (), BB);
2637
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
2638
+ VPInstruction::ResumePhi,
2639
+ {Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2640
+ OrigPhi->getDebugLoc (), " bc.resume.val" );
2641
+ auto *ScalarLoopHeader = Plan.getScalarHeader ();
2642
+ for (VPRecipeBase &R : *ScalarLoopHeader) {
2643
+ auto *IRI = cast<VPIRInstruction>(&R);
2644
+ if (&IRI->getInstruction () == OrigPhi) {
2645
+ IRI->addOperand (ResumePhiRecipe);
2646
+ break ;
2647
+ }
2648
+ }
2645
2649
2646
- if (AdditionalBypass.first )
2647
- BCResumeVal->setIncomingValueForBlock (AdditionalBypass.first ,
2648
- EndValueFromAdditionalBypass);
2649
- return BCResumeVal;
2650
+ InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
2651
+ EndValueFromAdditionalBypass};
2650
2652
}
2651
2653
2652
2654
// / Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2676,13 +2678,14 @@ void InnerLoopVectorizer::createInductionResumeValues(
2676
2678
// iteration in the vectorized loop.
2677
2679
// If we come from a bypass edge then we need to start from the original
2678
2680
// start value.
2681
+ VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
2682
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
2679
2683
for (const auto &InductionEntry : Legal->getInductionVars ()) {
2680
2684
PHINode *OrigPhi = InductionEntry.first ;
2681
2685
const InductionDescriptor &II = InductionEntry.second ;
2682
- PHINode *BCResumeVal = createInductionResumeValue (
2683
- OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
2684
- AdditionalBypass);
2685
- OrigPhi->setIncomingValueForBlock (LoopScalarPreHeader, BCResumeVal);
2686
+ createInductionResumeValue (OrigPhi, II, getExpandedStep (II, ExpandedSCEVs),
2687
+ LoopBypassBlocks, ScalarPHBuilder,
2688
+ AdditionalBypass);
2686
2689
}
2687
2690
}
2688
2691
@@ -7808,6 +7811,27 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7808
7811
// the second pass for the scalar loop. The induction resume values for the
7809
7812
// inductions in the epilogue loop are created before executing the plan for
7810
7813
// the epilogue loop.
7814
+ VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
7815
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
7816
+ for (VPRecipeBase &R :
7817
+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
7818
+ // Create induction resume values for both widened pointer and
7819
+ // integer/fp inductions and update the start value of the induction
7820
+ // recipes to use the resume value.
7821
+ PHINode *IndPhi = nullptr ;
7822
+ const InductionDescriptor *ID;
7823
+ if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
7824
+ IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
7825
+ ID = &Ind->getInductionDescriptor ();
7826
+ } else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
7827
+ IndPhi = WidenInd->getPHINode ();
7828
+ ID = &WidenInd->getInductionDescriptor ();
7829
+ } else
7830
+ continue ;
7831
+
7832
+ createInductionResumeValue (IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
7833
+ LoopBypassBlocks, ScalarPHBuilder);
7834
+ }
7811
7835
7812
7836
return {LoopVectorPreHeader, nullptr };
7813
7837
}
@@ -10296,23 +10320,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10296
10320
RdxDesc.getRecurrenceStartValue ());
10297
10321
}
10298
10322
} else {
10299
- // Create induction resume values for both widened pointer and
10300
- // integer/fp inductions and update the start value of the induction
10301
- // recipes to use the resume value.
10323
+ // Retrive the induction resume values for wide inductions from
10324
+ // their original phi nodes in the scalar loop
10302
10325
PHINode *IndPhi = nullptr ;
10303
- const InductionDescriptor *ID;
10304
10326
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
10305
10327
IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
10306
- ID = &Ind->getInductionDescriptor ();
10307
10328
} else {
10308
10329
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
10309
10330
IndPhi = WidenInd->getPHINode ();
10310
- ID = &WidenInd->getInductionDescriptor ();
10311
10331
}
10312
-
10313
- ResumeV = MainILV.createInductionResumeValue (
10314
- IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
10315
- {EPI.MainLoopIterationCountCheck });
10332
+ ResumeV = IndPhi->getIncomingValueForBlock (L->getLoopPreheader ());
10316
10333
}
10317
10334
assert (ResumeV && " Must have a resume value" );
10318
10335
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn (ResumeV);
@@ -10324,7 +10341,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10324
10341
LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10325
10342
DT, true , &ExpandedSCEVs);
10326
10343
++LoopsEpilogueVectorized;
10344
+ BasicBlock *PH = L->getLoopPreheader ();
10327
10345
10346
+ for (const auto &[IVPhi, _] : LVL.getInductionVars ()) {
10347
+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10348
+ const auto &[BB, V] = EpilogILV.getInductionBypassValue (IVPhi);
10349
+ Inc->setIncomingValueForBlock (BB, V);
10350
+ }
10328
10351
if (!MainILV.areSafetyChecksAdded ())
10329
10352
DisableRuntimeUnroll = true ;
10330
10353
} else {
0 commit comments