@@ -513,17 +513,19 @@ class InnerLoopVectorizer {
513513 // / Fix the non-induction PHIs in \p Plan.
514514 void fixNonInductionPHIs (VPTransformState &State);
515515
516- // / Create a ResumePHI VPInstruction for the induction \p PhiRIR to resume
517- // / iteration count in the scalar epilogue from where the vectorized loop
518- // / left off, and add it to the scalar preheader of VPlan. \p Step is the
519- // / SCEV-expanded induction step to use. In cases where the loop skeleton is
520- // / more complicated (i.e., epilogue vectorization) and the resume values can
521- // / come from an additional bypass block, the \p AdditionalBypass pair
522- // / provides this additional bypass block along with the resume value coming
523- // / from it.
516+ // / Create a ResumePHI VPInstruction for the induction \p InductionPhiIRI to
517+ // / resume iteration count in the scalar epilogue from where the vectorized
518+ // / loop left off, and add it to the scalar preheader of VPlan. Also creates
519+ // / the induction resume value, and the value for the bypass block, if needed.
520+ // / \p Step is the SCEV-expanded induction step to use. In cases where the
521+ // / loop skeleton is more complicated (i.e., epilogue vectorization) and the
522+ // / resume values can come from an additional bypass block, the \p
523+ // / AdditionalBypass pair provides this additional bypass block along with the
524+ // / resume value coming from it.
524525 void createInductionResumeVPValue (
525- VPIRInstruction *PhiIRI, const InductionDescriptor &ID, Value *Step,
526- ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
526+ VPIRInstruction *InductionPhiIRI, const InductionDescriptor &ID,
527+ Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
528+ VPBuilder &ScalarPHBuilder,
527529 std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
528530
529531 // / Returns the original loop trip count.
@@ -534,9 +536,15 @@ class InnerLoopVectorizer {
534536 // / count of the original loop for both main loop and epilogue vectorization.
535537 void setTripCount (Value *TC) { TripCount = TC; }
536538
537- std::pair<BasicBlock *, Value *>
538- getInductionBypassValue (PHINode *OrigPhi) const {
539- return InductionBypassValues.at (OrigPhi);
539+ // / Retrieve the bypass value associated with an original induction header
540+ // / phi.
541+ Value *getInductionAdditionalBypassValue (PHINode *OrigPhi) const {
542+ return Induction2AdditionalBypass.at (OrigPhi).second ;
543+ }
544+
545+ // / Return the additional bypass block.
546+ BasicBlock *getInductionAdditionalBypassBlock () const {
547+ return Induction2AdditionalBypass.begin ()->second .first ;
540548 }
541549
542550protected:
@@ -671,9 +679,10 @@ class InnerLoopVectorizer {
671679 GeneratedRTChecks &RTChecks;
672680
673681 // / Mapping of induction phis to their bypass values and bypass blocks. They
674- // / need to be added to their phi nodes after the epilogue skeleton has been
675- // / created.
676- DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
682+ // / need to be added as operands to phi nodes in the scalar loop preheader
683+ // / after the epilogue skeleton has been created.
684+ DenseMap<PHINode *, std::pair<BasicBlock *, Value *>>
685+ Induction2AdditionalBypass;
677686
678687 VPlan &Plan;
679688};
@@ -2592,10 +2601,10 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25922601}
25932602
25942603void InnerLoopVectorizer::createInductionResumeVPValue (
2595- VPIRInstruction *PhiR , const InductionDescriptor &II, Value *Step,
2604+ VPIRInstruction *InductionPhiRI , const InductionDescriptor &II, Value *Step,
25962605 ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
25972606 std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598- auto *OrigPhi = cast<PHINode>(&PhiR ->getInstruction ());
2607+ auto *OrigPhi = cast<PHINode>(&InductionPhiRI ->getInstruction ());
25992608 Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
26002609 assert (VectorTripCount && " Expected valid arguments" );
26012610
@@ -2627,20 +2636,25 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
26272636 }
26282637 }
26292638
2630- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
2631- VPInstruction::ResumePhi,
2632- {Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2633- OrigPhi->getDebugLoc (), " bc.resume.val" );
2634- assert (PhiR->getNumOperands () == 0 && " PhiR should not have any operands" );
2635- PhiR->addOperand (ResumePhiRecipe);
2639+ if (!AdditionalBypass.first && OrigPhi != OldInduction) {
2640+ auto *ResumePhiRecipe =
2641+ ScalarPHBuilder.createNaryOp (VPInstruction::ResumePhi,
2642+ {Plan.getOrAddLiveIn (EndValue),
2643+ Plan.getOrAddLiveIn (II.getStartValue ())},
2644+ OrigPhi->getDebugLoc (), " bc.resume.val" );
2645+ assert (InductionPhiRI->getNumOperands () == 0 &&
2646+ " InductionPhiRI should not have any operands" );
2647+ InductionPhiRI->addOperand (ResumePhiRecipe);
2648+ }
26362649
26372650 if (AdditionalBypass.first ) {
2638- // Store the bypass values here, as they need to be added to their phi nodes
2639- // after the epilogue skeleton has been created.
2640- assert (!InductionBypassValues.contains (OrigPhi) &&
2651+ // Store the bypass value here, as it needs to be added as operand to its
2652+ // scalar preheader phi node after the epilogue skeleton has been created.
2653+ // TODO: Directly add as extra operand to the VPResumePHI recipe.
2654+ assert (!Induction2AdditionalBypass.contains (OrigPhi) &&
26412655 " entry for OrigPhi already exits" );
2642- InductionBypassValues [OrigPhi] = {AdditionalBypass.first ,
2643- EndValueFromAdditionalBypass};
2656+ Induction2AdditionalBypass [OrigPhi] = {AdditionalBypass.first ,
2657+ EndValueFromAdditionalBypass};
26442658 }
26452659}
26462660
@@ -2665,11 +2679,13 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
26652679 (!AdditionalBypass.first && !AdditionalBypass.second )) &&
26662680 " Inconsistent information about additional bypass." );
26672681 // We are going to resume the execution of the scalar loop.
2668- // Go over all of the induction variables in the scalar header and fix the
2669- // PHIs that are left in the scalar version of the loop. The starting values
2670- // of PHI nodes depend on the counter of the last iteration in the vectorized
2671- // loop. If we come from a bypass edge then we need to start from the original
2672- // start value.
2682+ // Go over all of the induction variable PHIs of the scalar loop header and
2683+ // fix their starting values, which depend on the counter of the last
2684+ // iteration of the vectorized loop. The starting values of PHI nodes depend
2685+ // on the counter of the last iteration in the vectorized loop. If we come
2686+ // from one of the LoopBypassBlocks then we need to start from the original
2687+ // start value. If we come from the AdditionalBypass then we need to start
2688+ // from its value.
26732689 VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
26742690 VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
26752691 for (VPRecipeBase &R : *Plan.getScalarHeader ()) {
@@ -7595,7 +7611,8 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
75957611// fix the reduction's scalar PHI node by adding the incoming value from the
75967612// main vector loop.
75977613static void fixReductionScalarResumeWhenVectorizingEpilog (
7598- VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock) {
7614+ VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock,
7615+ BasicBlock *BypassBlock) {
75997616 auto *EpiRedResult = dyn_cast<VPInstruction>(R);
76007617 if (!EpiRedResult ||
76017618 EpiRedResult->getOpcode () != VPInstruction::ComputeReductionResult)
@@ -7632,21 +7649,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
76327649 auto *EpiResumePhiVPI =
76337650 cast<VPInstruction>(*find_if (EpiRedResult->users (), IsResumePhi));
76347651 auto *EpiResumePhi = cast<PHINode>(State.get (EpiResumePhiVPI, true ));
7635- BasicBlock *LoopScalarPreHeader = EpiResumePhi->getParent ();
7636- bool Updated = false ;
7637- for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7638- if (is_contained (MainResumePhi->blocks (), Incoming)) {
7639- assert (EpiResumePhi->getIncomingValueForBlock (Incoming) ==
7640- RdxDesc.getRecurrenceStartValue () &&
7641- " Trying to reset unexpected value" );
7642- assert (!Updated && " Should update at most 1 incoming value" );
7643- EpiResumePhi->setIncomingValueForBlock (
7644- Incoming, MainResumePhi->getIncomingValueForBlock (Incoming));
7645- Updated = true ;
7646- }
7647- }
7648- assert (Updated && " Must update EpiResumePhi." );
7649- (void )Updated;
7652+ EpiResumePhi->setIncomingValueForBlock (
7653+ BypassBlock, MainResumePhi->getIncomingValueForBlock (BypassBlock));
76507654}
76517655
76527656DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
@@ -7696,6 +7700,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76967700 std::tie (State.CFG .PrevBB , CanonicalIVStartValue) =
76977701 ILV.createVectorizedLoopSkeleton (ExpandedSCEVs ? *ExpandedSCEVs
76987702 : State.ExpandedSCEVs );
7703+ if (VectorizingEpilogue)
7704+ VPlanTransforms::removeDeadRecipes (BestVPlan);
7705+
76997706#ifdef EXPENSIVE_CHECKS
77007707 assert (DT->verify (DominatorTree::VerificationLevel::Fast));
77017708#endif
@@ -7736,18 +7743,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77367743 BestVPlan.execute (&State);
77377744
77387745 auto *ExitVPBB = BestVPlan.getMiddleBlock ();
7739- // 2.5 When vectorizing the epilogue, fix reduction resume values and
7740- // induction resume values from the bypass blocks .
7746+ // 2.5 When vectorizing the epilogue, fix reduction and induction resume
7747+ // values from the additional bypass block .
77417748 if (VectorizingEpilogue) {
7749+ BasicBlock *BypassBlock = ILV.getInductionAdditionalBypassBlock ();
77427750 for (VPRecipeBase &R : *ExitVPBB) {
77437751 fixReductionScalarResumeWhenVectorizingEpilog (
7744- &R, State, State.CFG .VPBB2IRBB [ExitVPBB]);
7752+ &R, State, State.CFG .VPBB2IRBB [ExitVPBB], BypassBlock );
77457753 }
77467754 BasicBlock *PH = OrigLoop->getLoopPreheader ();
77477755 for (const auto &[IVPhi, _] : Legal->getInductionVars ()) {
77487756 auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
7749- const auto &[BB, V] = ILV.getInductionBypassValue (IVPhi);
7750- Inc->setIncomingValueForBlock (BB , V);
7757+ Value *V = ILV.getInductionAdditionalBypassValue (IVPhi);
7758+ Inc->setIncomingValueForBlock (BypassBlock , V);
77517759 }
77527760 }
77537761
@@ -7838,8 +7846,8 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78387846 // Generate the induction variable.
78397847 EPI.VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
78407848
7841- // Create induction resume values and ResumePhis for the inductions in the
7842- // epilogue loop in the VPlan for the epilogue vector loop .
7849+ // Generate VPValues and ResumePhi recipes for inductions in the epilog loop
7850+ // to resume from the main loop or bypass it .
78437851 createInductionResumeVPValues (ExpandedSCEVs);
78447852
78457853 return {LoopVectorPreHeader, nullptr };
@@ -10347,6 +10355,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1034710355 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
1034810356 DT, true , &ExpandedSCEVs);
1034910357 ++LoopsEpilogueVectorized;
10358+
1035010359 if (!MainILV.areSafetyChecksAdded ())
1035110360 DisableRuntimeUnroll = true ;
1035210361 } else {
0 commit comments