@@ -513,16 +513,17 @@ class InnerLoopVectorizer {
513
513
void fixNonInductionPHIs (VPTransformState &State);
514
514
515
515
// / Create a ResumePHI VPInstruction for the induction variable \p OrigPhi to
516
- // / resume iteration count in the scalar epilogue, from where the vectorized
517
- // / loop left off and add it the scalar preheader of the VPlan. \p Step is the
516
+ // / resume iteration count in the scalar epilogue from where the vectorized
517
+ // / loop left off, and add it to the scalar preheader of VPlan. \p Step is the
518
518
// / SCEV-expanded induction step to use. In cases where the loop skeleton is
519
519
// / more complicated (i.e., epilogue vectorization) and the resume values can
520
520
// / come from an additional bypass block, the \p AdditionalBypass pair
521
- // / provides information about the bypass block and the end value on the edge
522
- // / from bypass to this loop .
521
+ // / provides this additional bypass block along with the resume value coming
522
+ // / from it .
523
523
void createInductionResumeValue (
524
- PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
525
- ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
524
+ VPIRInstruction *PhiIRI, PHINode *OrigPhi, const InductionDescriptor &ID,
525
+ Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
526
+ VPBuilder &ScalarPHBuilder,
526
527
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
527
528
528
529
// / Returns the original loop trip count.
@@ -535,7 +536,7 @@ class InnerLoopVectorizer {
535
536
536
537
std::pair<BasicBlock *, Value *>
537
538
getInductionBypassValue (PHINode *OrigPhi) const {
538
- return InductionBypassValues.find (OrigPhi)-> second ;
539
+ return InductionBypassValues.at (OrigPhi);
539
540
}
540
541
541
542
protected:
@@ -669,7 +670,9 @@ class InnerLoopVectorizer {
669
670
// / for cleaning the checks, if vectorization turns out unprofitable.
670
671
GeneratedRTChecks &RTChecks;
671
672
672
- // / Mapping of induction phis to their bypass values and bypass blocks.
673
+ // / Mapping of induction phis to their bypass values and bypass blocks. They
674
+ // / need to be added to their phi nodes after the epilogue skeleton has been
675
+ // / created.
673
676
DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
674
677
675
678
VPlan &Plan;
@@ -2586,8 +2589,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2586
2589
}
2587
2590
2588
2591
void InnerLoopVectorizer::createInductionResumeValue (
2589
- PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2590
- ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
2592
+ VPIRInstruction *PhiR, PHINode *OrigPhi, const InductionDescriptor &II,
2593
+ Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
2594
+ VPBuilder &ScalarPHBuilder,
2591
2595
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2592
2596
Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2593
2597
assert (VectorTripCount && " Expected valid arguments" );
@@ -2624,18 +2628,17 @@ void InnerLoopVectorizer::createInductionResumeValue(
2624
2628
VPInstruction::ResumePhi,
2625
2629
{Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2626
2630
OrigPhi->getDebugLoc (), " bc.resume.val" );
2627
- auto *ScalarLoopHeader = Plan.getScalarHeader ();
2628
- for (VPRecipeBase &R : *ScalarLoopHeader) {
2629
- auto *IRI = cast<VPIRInstruction>(&R);
2630
- if (&IRI->getInstruction () == OrigPhi) {
2631
- IRI->addOperand (ResumePhiRecipe);
2632
- break ;
2633
- }
2634
- }
2635
-
2636
- if (AdditionalBypass.first )
2631
+ assert (PhiR->getNumOperands () == 0 && " PhiR should not have any operands" );
2632
+ PhiR->addOperand (ResumePhiRecipe);
2633
+
2634
+ if (AdditionalBypass.first ) {
2635
+ // Store the bypass values here, as they need to be added to their phi nodes
2636
+ // after the epilogue skeleton has been created.
2637
+ assert (!InductionBypassValues.contains (OrigPhi) &&
2638
+ " entry for OrigPhi already exits" );
2637
2639
InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
2638
2640
EndValueFromAdditionalBypass};
2641
+ }
2639
2642
}
2640
2643
2641
2644
// / Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2659,20 +2662,24 @@ void InnerLoopVectorizer::createInductionResumeValues(
2659
2662
(!AdditionalBypass.first && !AdditionalBypass.second )) &&
2660
2663
" Inconsistent information about additional bypass." );
2661
2664
// We are going to resume the execution of the scalar loop.
2662
- // Go over all of the induction variables that we found and fix the
2663
- // PHIs that are left in the scalar version of the loop.
2664
- // The starting values of PHI nodes depend on the counter of the last
2665
- // iteration in the vectorized loop.
2666
- // If we come from a bypass edge then we need to start from the original
2665
+ // Go over all of the induction variables in the scalar header and fix the
2666
+ // PHIs that are left in the scalar version of the loop. The starting values
2667
+ // of PHI nodes depend on the counter of the last iteration in the vectorized
2668
+ // loop. If we come from a bypass edge then we need to start from the original
2667
2669
// start value.
2668
2670
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
2669
2671
VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
2670
- for (const auto &InductionEntry : Legal->getInductionVars ()) {
2671
- PHINode *OrigPhi = InductionEntry.first ;
2672
- const InductionDescriptor &II = InductionEntry.second ;
2673
- createInductionResumeValue (OrigPhi, II, getExpandedStep (II, ExpandedSCEVs),
2674
- LoopBypassBlocks, ScalarPHBuilder,
2675
- AdditionalBypass);
2672
+ for (VPRecipeBase &R : *Plan.getScalarHeader ()) {
2673
+ auto *PhiR = cast<VPIRInstruction>(&R);
2674
+ auto *Phi = dyn_cast<PHINode>(&PhiR->getInstruction ());
2675
+ if (!Phi)
2676
+ break ;
2677
+ if (!Legal->getInductionVars ().contains (Phi))
2678
+ continue ;
2679
+ const InductionDescriptor &II = Legal->getInductionVars ().find (Phi)->second ;
2680
+ createInductionResumeValue (
2681
+ PhiR, Phi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
2682
+ ScalarPHBuilder, AdditionalBypass);
2676
2683
}
2677
2684
}
2678
2685
@@ -7713,13 +7720,21 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7713
7720
7714
7721
BestVPlan.execute (&State);
7715
7722
7716
- // 2.5 Collect reduction resume values.
7717
7723
auto *ExitVPBB = BestVPlan.getMiddleBlock ();
7718
- if (VectorizingEpilogue)
7724
+ // 2.5 When vectorizing the epilogue, fix reduction resume values and
7725
+ // induction resume values from the bypass blocks.
7726
+ if (VectorizingEpilogue) {
7719
7727
for (VPRecipeBase &R : *ExitVPBB) {
7720
7728
fixReductionScalarResumeWhenVectorizingEpilog (
7721
7729
&R, State, State.CFG .VPBB2IRBB [ExitVPBB]);
7722
7730
}
7731
+ BasicBlock *PH = OrigLoop->getLoopPreheader ();
7732
+ for (const auto &[IVPhi, _] : Legal->getInductionVars ()) {
7733
+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
7734
+ const auto &[BB, V] = ILV.getInductionBypassValue (IVPhi);
7735
+ Inc->setIncomingValueForBlock (BB, V);
7736
+ }
7737
+ }
7723
7738
7724
7739
// 2.6. Maintain Loop Hints
7725
7740
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -7808,10 +7823,8 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7808
7823
// Generate the induction variable.
7809
7824
EPI.VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
7810
7825
7811
- // Skip induction resume value creation here because they will be created in
7812
- // the second pass for the scalar loop. The induction resume values for the
7813
- // inductions in the epilogue loop are created before executing the plan for
7814
- // the epilogue loop.
7826
+ // Create induction resume values and ResumePhis for the inductions in the
7827
+ // epilogue loop in the VPlan for the epilogue vector loop.
7815
7828
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
7816
7829
VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
7817
7830
for (VPRecipeBase &R :
@@ -7827,11 +7840,19 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7827
7840
} else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
7828
7841
IndPhi = WidenInd->getPHINode ();
7829
7842
ID = &WidenInd->getInductionDescriptor ();
7830
- } else
7843
+ } else {
7831
7844
continue ;
7845
+ }
7832
7846
7833
- createInductionResumeValue (IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
7834
- LoopBypassBlocks, ScalarPHBuilder);
7847
+ auto *ScalarLoopHeader = Plan.getScalarHeader ();
7848
+ for (VPRecipeBase &R : *ScalarLoopHeader) {
7849
+ auto *PhiR = cast<VPIRInstruction>(&R);
7850
+ if (&PhiR->getInstruction () != IndPhi)
7851
+ continue ;
7852
+ createInductionResumeValue (PhiR, IndPhi, *ID,
7853
+ getExpandedStep (*ID, ExpandedSCEVs),
7854
+ LoopBypassBlocks, ScalarPHBuilder);
7855
+ }
7835
7856
}
7836
7857
7837
7858
return {LoopVectorPreHeader, nullptr };
@@ -10321,8 +10342,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10321
10342
RdxDesc.getRecurrenceStartValue ());
10322
10343
}
10323
10344
} else {
10324
- // Retrive the induction resume values for wide inductions from
10325
- // their original phi nodes in the scalar loop
10345
+ // Retrieve the induction resume values for wide inductions from
10346
+ // their original phi nodes in the scalar loop.
10326
10347
PHINode *IndPhi = nullptr ;
10327
10348
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
10328
10349
IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
@@ -10342,13 +10363,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10342
10363
LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10343
10364
DT, true , &ExpandedSCEVs);
10344
10365
++LoopsEpilogueVectorized;
10345
- BasicBlock *PH = L->getLoopPreheader ();
10346
-
10347
- for (const auto &[IVPhi, _] : LVL.getInductionVars ()) {
10348
- auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10349
- const auto &[BB, V] = EpilogILV.getInductionBypassValue (IVPhi);
10350
- Inc->setIncomingValueForBlock (BB, V);
10351
- }
10352
10366
if (!MainILV.areSafetyChecksAdded ())
10353
10367
DisableRuntimeUnroll = true ;
10354
10368
} else {
0 commit comments