@@ -513,17 +513,19 @@ class InnerLoopVectorizer {
513
513
// / Fix the non-induction PHIs in \p Plan.
514
514
void fixNonInductionPHIs (VPTransformState &State);
515
515
516
- // / Create a ResumePHI VPInstruction for the induction \p PhiRIR to resume
517
- // / iteration count in the scalar epilogue from where the vectorized loop
518
- // / left off, and add it to the scalar preheader of VPlan. \p Step is the
519
- // / SCEV-expanded induction step to use. In cases where the loop skeleton is
520
- // / more complicated (i.e., epilogue vectorization) and the resume values can
521
- // / come from an additional bypass block, the \p AdditionalBypass pair
522
- // / provides this additional bypass block along with the resume value coming
523
- // / from it.
516
+ // / Create a ResumePHI VPInstruction for the induction \p InductionPhiIRI to
517
+ // / resume iteration count in the scalar epilogue from where the vectorized
518
+ // / loop left off, and add it to the scalar preheader of VPlan. Also creates
519
+ // / the induction resume value, and the value for the bypass block, if needed.
520
+ // / \p Step is the SCEV-expanded induction step to use. In cases where the
521
+ // / loop skeleton is more complicated (i.e., epilogue vectorization) and the
522
+ // / resume values can come from an additional bypass block, the \p
523
+ // / AdditionalBypass pair provides this additional bypass block along with the
524
+ // / resume value coming from it.
524
525
void createInductionResumeVPValue (
525
- VPIRInstruction *PhiIRI, const InductionDescriptor &ID, Value *Step,
526
- ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
526
+ VPIRInstruction *InductionPhiIRI, const InductionDescriptor &ID,
527
+ Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
528
+ VPBuilder &ScalarPHBuilder,
527
529
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
528
530
529
531
// / Returns the original loop trip count.
@@ -534,9 +536,15 @@ class InnerLoopVectorizer {
534
536
// / count of the original loop for both main loop and epilogue vectorization.
535
537
void setTripCount (Value *TC) { TripCount = TC; }
536
538
537
- std::pair<BasicBlock *, Value *>
538
- getInductionBypassValue (PHINode *OrigPhi) const {
539
- return InductionBypassValues.at (OrigPhi);
539
+ // / Retrieve the bypass value associated with an original induction header
540
+ // / phi.
541
+ Value *getInductionAdditionalBypassValue (PHINode *OrigPhi) const {
542
+ return Induction2AdditionalBypass.at (OrigPhi).second ;
543
+ }
544
+
545
+ // / Return the additional bypass block.
546
+ BasicBlock *getInductionAdditionalBypassBlock () const {
547
+ return Induction2AdditionalBypass.begin ()->second .first ;
540
548
}
541
549
542
550
protected:
@@ -671,9 +679,10 @@ class InnerLoopVectorizer {
671
679
GeneratedRTChecks &RTChecks;
672
680
673
681
// / Mapping of induction phis to their bypass values and bypass blocks. They
674
- // / need to be added to their phi nodes after the epilogue skeleton has been
675
- // / created.
676
- DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
682
+ // / need to be added as operands to phi nodes in the scalar loop preheader
683
+ // / after the epilogue skeleton has been created.
684
+ DenseMap<PHINode *, std::pair<BasicBlock *, Value *>>
685
+ Induction2AdditionalBypass;
677
686
678
687
VPlan &Plan;
679
688
};
@@ -2592,10 +2601,10 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2592
2601
}
2593
2602
2594
2603
void InnerLoopVectorizer::createInductionResumeVPValue (
2595
- VPIRInstruction *PhiR , const InductionDescriptor &II, Value *Step,
2604
+ VPIRInstruction *InductionPhiRI , const InductionDescriptor &II, Value *Step,
2596
2605
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
2597
2606
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598
- auto *OrigPhi = cast<PHINode>(&PhiR ->getInstruction ());
2607
+ auto *OrigPhi = cast<PHINode>(&InductionPhiRI ->getInstruction ());
2599
2608
Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2600
2609
assert (VectorTripCount && " Expected valid arguments" );
2601
2610
@@ -2627,20 +2636,25 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
2627
2636
}
2628
2637
}
2629
2638
2630
- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
2631
- VPInstruction::ResumePhi,
2632
- {Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2633
- OrigPhi->getDebugLoc (), " bc.resume.val" );
2634
- assert (PhiR->getNumOperands () == 0 && " PhiR should not have any operands" );
2635
- PhiR->addOperand (ResumePhiRecipe);
2639
+ if (!AdditionalBypass.first && OrigPhi != OldInduction) {
2640
+ auto *ResumePhiRecipe =
2641
+ ScalarPHBuilder.createNaryOp (VPInstruction::ResumePhi,
2642
+ {Plan.getOrAddLiveIn (EndValue),
2643
+ Plan.getOrAddLiveIn (II.getStartValue ())},
2644
+ OrigPhi->getDebugLoc (), " bc.resume.val" );
2645
+ assert (InductionPhiRI->getNumOperands () == 0 &&
2646
+ " InductionPhiRI should not have any operands" );
2647
+ InductionPhiRI->addOperand (ResumePhiRecipe);
2648
+ }
2636
2649
2637
2650
if (AdditionalBypass.first ) {
2638
- // Store the bypass values here, as they need to be added to their phi nodes
2639
- // after the epilogue skeleton has been created.
2640
- assert (!InductionBypassValues.contains (OrigPhi) &&
2651
+ // Store the bypass value here, as it needs to be added as operand to its
2652
+ // scalar preheader phi node after the epilogue skeleton has been created.
2653
+ // TODO: Directly add as extra operand to the VPResumePHI recipe.
2654
+ assert (!Induction2AdditionalBypass.contains (OrigPhi) &&
2641
2655
" entry for OrigPhi already exits" );
2642
- InductionBypassValues [OrigPhi] = {AdditionalBypass.first ,
2643
- EndValueFromAdditionalBypass};
2656
+ Induction2AdditionalBypass [OrigPhi] = {AdditionalBypass.first ,
2657
+ EndValueFromAdditionalBypass};
2644
2658
}
2645
2659
}
2646
2660
@@ -2665,11 +2679,13 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
2665
2679
(!AdditionalBypass.first && !AdditionalBypass.second )) &&
2666
2680
" Inconsistent information about additional bypass." );
2667
2681
// We are going to resume the execution of the scalar loop.
2668
- // Go over all of the induction variables in the scalar header and fix the
2669
- // PHIs that are left in the scalar version of the loop. The starting values
2670
- // of PHI nodes depend on the counter of the last iteration in the vectorized
2671
- // loop. If we come from a bypass edge then we need to start from the original
2672
- // start value.
2682
+ // Go over all of the induction variable PHIs of the scalar loop header and
2683
+ // fix their starting values, which depend on the counter of the last
2684
+ // iteration of the vectorized loop. The starting values of PHI nodes depend
2685
+ // on the counter of the last iteration in the vectorized loop. If we come
2686
+ // from one of the LoopBypassBlocks then we need to start from the original
2687
+ // start value. If we come from the AdditionalBypass then we need to start
2688
+ // from its value.
2673
2689
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
2674
2690
VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
2675
2691
for (VPRecipeBase &R : *Plan.getScalarHeader ()) {
@@ -7595,7 +7611,8 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
7595
7611
// fix the reduction's scalar PHI node by adding the incoming value from the
7596
7612
// main vector loop.
7597
7613
static void fixReductionScalarResumeWhenVectorizingEpilog (
7598
- VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock) {
7614
+ VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock,
7615
+ BasicBlock *BypassBlock) {
7599
7616
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
7600
7617
if (!EpiRedResult ||
7601
7618
EpiRedResult->getOpcode () != VPInstruction::ComputeReductionResult)
@@ -7632,21 +7649,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
7632
7649
auto *EpiResumePhiVPI =
7633
7650
cast<VPInstruction>(*find_if (EpiRedResult->users (), IsResumePhi));
7634
7651
auto *EpiResumePhi = cast<PHINode>(State.get (EpiResumePhiVPI, true ));
7635
- BasicBlock *LoopScalarPreHeader = EpiResumePhi->getParent ();
7636
- bool Updated = false ;
7637
- for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7638
- if (is_contained (MainResumePhi->blocks (), Incoming)) {
7639
- assert (EpiResumePhi->getIncomingValueForBlock (Incoming) ==
7640
- RdxDesc.getRecurrenceStartValue () &&
7641
- " Trying to reset unexpected value" );
7642
- assert (!Updated && " Should update at most 1 incoming value" );
7643
- EpiResumePhi->setIncomingValueForBlock (
7644
- Incoming, MainResumePhi->getIncomingValueForBlock (Incoming));
7645
- Updated = true ;
7646
- }
7647
- }
7648
- assert (Updated && " Must update EpiResumePhi." );
7649
- (void )Updated;
7652
+ EpiResumePhi->setIncomingValueForBlock (
7653
+ BypassBlock, MainResumePhi->getIncomingValueForBlock (BypassBlock));
7650
7654
}
7651
7655
7652
7656
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
@@ -7696,6 +7700,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7696
7700
std::tie (State.CFG .PrevBB , CanonicalIVStartValue) =
7697
7701
ILV.createVectorizedLoopSkeleton (ExpandedSCEVs ? *ExpandedSCEVs
7698
7702
: State.ExpandedSCEVs );
7703
+ if (VectorizingEpilogue)
7704
+ VPlanTransforms::removeDeadRecipes (BestVPlan);
7705
+
7699
7706
#ifdef EXPENSIVE_CHECKS
7700
7707
assert (DT->verify (DominatorTree::VerificationLevel::Fast));
7701
7708
#endif
@@ -7736,18 +7743,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7736
7743
BestVPlan.execute (&State);
7737
7744
7738
7745
auto *ExitVPBB = BestVPlan.getMiddleBlock ();
7739
- // 2.5 When vectorizing the epilogue, fix reduction resume values and
7740
- // induction resume values from the bypass blocks .
7746
+ // 2.5 When vectorizing the epilogue, fix reduction and induction resume
7747
+ // values from the additional bypass block .
7741
7748
if (VectorizingEpilogue) {
7749
+ BasicBlock *BypassBlock = ILV.getInductionAdditionalBypassBlock ();
7742
7750
for (VPRecipeBase &R : *ExitVPBB) {
7743
7751
fixReductionScalarResumeWhenVectorizingEpilog (
7744
- &R, State, State.CFG .VPBB2IRBB [ExitVPBB]);
7752
+ &R, State, State.CFG .VPBB2IRBB [ExitVPBB], BypassBlock );
7745
7753
}
7746
7754
BasicBlock *PH = OrigLoop->getLoopPreheader ();
7747
7755
for (const auto &[IVPhi, _] : Legal->getInductionVars ()) {
7748
7756
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
7749
- const auto &[BB, V] = ILV.getInductionBypassValue (IVPhi);
7750
- Inc->setIncomingValueForBlock (BB , V);
7757
+ Value *V = ILV.getInductionAdditionalBypassValue (IVPhi);
7758
+ Inc->setIncomingValueForBlock (BypassBlock , V);
7751
7759
}
7752
7760
}
7753
7761
@@ -7838,8 +7846,8 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7838
7846
// Generate the induction variable.
7839
7847
EPI.VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
7840
7848
7841
- // Create induction resume values and ResumePhis for the inductions in the
7842
- // epilogue loop in the VPlan for the epilogue vector loop .
7849
+ // Generate VPValues and ResumePhi recipes for inductions in the epilog loop
7850
+ // to resume from the main loop or bypass it .
7843
7851
createInductionResumeVPValues (ExpandedSCEVs);
7844
7852
7845
7853
return {LoopVectorPreHeader, nullptr };
@@ -10347,6 +10355,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10347
10355
LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10348
10356
DT, true , &ExpandedSCEVs);
10349
10357
++LoopsEpilogueVectorized;
10358
+
10350
10359
if (!MainILV.areSafetyChecksAdded ())
10351
10360
DisableRuntimeUnroll = true ;
10352
10361
} else {
0 commit comments