@@ -532,14 +532,6 @@ class InnerLoopVectorizer {
532532 // / count of the original loop for both main loop and epilogue vectorization.
533533 void setTripCount (Value *TC) { TripCount = TC; }
534534
535- // / Return the additional bypass block which targets the scalar loop by
536- // / skipping the epilogue loop after completing the main loop.
537- BasicBlock *getAdditionalBypassBlock () const {
538- assert (AdditionalBypassBlock &&
539- " Trying to access AdditionalBypassBlock but it has not been set" );
540- return AdditionalBypassBlock;
541- }
542-
543535protected:
544536 friend class LoopVectorizationPlanner ;
545537
@@ -602,11 +594,6 @@ class InnerLoopVectorizer {
602594 // / for cleaning the checks, if vectorization turns out unprofitable.
603595 GeneratedRTChecks &RTChecks;
604596
605- // / The additional bypass block which conditionally skips over the epilogue
606- // / loop after executing the main loop. Needed to resume inductions and
607- // / reductions during epilogue vectorization.
608- BasicBlock *AdditionalBypassBlock = nullptr ;
609-
610597 VPlan &Plan;
611598
612599 // / The vector preheader block of \p Plan, used as target for check blocks
@@ -711,6 +698,11 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
711698// vectorization of *epilogue* loops in the process of vectorizing loops and
712699// their epilogues.
713700class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
701+ // / The additional bypass block which conditionally skips over the epilogue
702+ // / loop after executing the main loop. Needed to resume inductions and
703+ // / reductions during epilogue vectorization.
704+ BasicBlock *AdditionalBypassBlock = nullptr ;
705+
714706public:
715707 EpilogueVectorizerEpilogueLoop (
716708 Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
@@ -727,6 +719,14 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
727719 // / *epilogue loop* strategy (i.e., the second pass of VPlan execution).
728720 BasicBlock *createVectorizedLoopSkeleton () final ;
729721
722+ // / Return the additional bypass block which targets the scalar loop by
723+ // / skipping the epilogue loop after completing the main loop.
724+ BasicBlock *getAdditionalBypassBlock () const {
725+ assert (AdditionalBypassBlock &&
726+ " Trying to access AdditionalBypassBlock but it has not been set" );
727+ return AdditionalBypassBlock;
728+ }
729+
730730protected:
731731 // / Emits an iteration count bypass check after the main vector loop has
732732 // / finished to see if there are any iterations left to execute by either
@@ -7139,7 +7139,7 @@ static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) {
71397139// epilog loop, fix the reduction's scalar PHI node by adding the incoming value
71407140// from the main vector loop.
71417141static void fixReductionScalarResumeWhenVectorizingEpilog (
7142- VPPhi *EpiResumePhiR, VPTransformState &State , BasicBlock *BypassBlock) {
7142+ VPPhi *EpiResumePhiR, PHINode &EpiResumePhi , BasicBlock *BypassBlock) {
71437143 // Get the VPInstruction computing the reduction result in the middle block.
71447144 // The first operand may not be from the middle block if it is not connected
71457145 // to the scalar preheader. In that case, there's nothing to fix.
@@ -7194,8 +7194,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
71947194 // When fixing reductions in the epilogue loop we should already have
71957195 // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
71967196 // over the incoming values correctly.
7197- auto *EpiResumePhi = cast<PHINode>(State.get (EpiResumePhiR, true ));
7198- EpiResumePhi->setIncomingValueForBlock (
7197+ EpiResumePhi.setIncomingValueForBlock (
71997198 BypassBlock, MainResumePhi->getIncomingValueForBlock (BypassBlock));
72007199}
72017200
@@ -7306,31 +7305,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73067305
73077306 BestVPlan.execute (&State);
73087307
7309- // 2.5 When vectorizing the epilogue, fix reduction resume values from the
7310- // additional bypass block.
7311- if (VectorizingEpilogue) {
7312- assert (!BestVPlan.hasEarlyExit () &&
7313- " Epilogue vectorisation not yet supported with early exits" );
7314- BasicBlock *PH = OrigLoop->getLoopPreheader ();
7315- BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock ();
7316- for (auto *Pred : predecessors (PH)) {
7317- for (PHINode &Phi : PH->phis ()) {
7318- if (Phi.getBasicBlockIndex (Pred) != -1 )
7319- continue ;
7320- Phi.addIncoming (Phi.getIncomingValueForBlock (BypassBlock), Pred);
7321- }
7322- }
7323- VPBasicBlock *ScalarPH = BestVPlan.getScalarPreheader ();
7324- if (ScalarPH->hasPredecessors ()) {
7325- // If ScalarPH has predecessors, we may need to update its reduction
7326- // resume values.
7327- for (VPRecipeBase &R : ScalarPH->phis ()) {
7328- fixReductionScalarResumeWhenVectorizingEpilog (cast<VPPhi>(&R), State,
7329- BypassBlock);
7330- }
7331- }
7332- }
7333-
73347308 // 2.6. Maintain Loop Hints
73357309 // Keep all loop hints from the original loop on the vector loop (we'll
73367310 // replace the vectorizer-specific hints below).
@@ -9851,6 +9825,43 @@ static Value *createInductionAdditionalBypassValues(
98519825 return EndValueFromAdditionalBypass;
98529826}
98539827
9828+ static void fixScalarResumeValuesFromBypass (BasicBlock *BypassBlock, Loop *L,
9829+ VPlan &BestEpiPlan,
9830+ LoopVectorizationLegality &LVL,
9831+ const SCEV2ValueTy &ExpandedSCEVs,
9832+ Value *MainVectorTripCount) {
9833+ // Fix reduction resume values from the additional bypass block.
9834+ BasicBlock *PH = L->getLoopPreheader ();
9835+ for (auto *Pred : predecessors (PH)) {
9836+ for (PHINode &Phi : PH->phis ()) {
9837+ if (Phi.getBasicBlockIndex (Pred) != -1 )
9838+ continue ;
9839+ Phi.addIncoming (Phi.getIncomingValueForBlock (BypassBlock), Pred);
9840+ }
9841+ }
9842+ auto *ScalarPH = cast<VPIRBasicBlock>(BestEpiPlan.getScalarPreheader ());
9843+ if (ScalarPH->hasPredecessors ()) {
9844+ // If ScalarPH has predecessors, we may need to update its reduction
9845+ // resume values.
9846+ for (const auto &[R, IRPhi] :
9847+ zip (ScalarPH->phis (), ScalarPH->getIRBasicBlock ()->phis ())) {
9848+ fixReductionScalarResumeWhenVectorizingEpilog (cast<VPPhi>(&R), IRPhi,
9849+ BypassBlock);
9850+ }
9851+ }
9852+
9853+ // Fix induction resume values from the additional bypass block.
9854+ IRBuilder<> BypassBuilder (BypassBlock, BypassBlock->getFirstInsertionPt ());
9855+ for (const auto &[IVPhi, II] : LVL.getInductionVars ()) {
9856+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
9857+ Value *V = createInductionAdditionalBypassValues (
9858+ IVPhi, II, BypassBuilder, ExpandedSCEVs, MainVectorTripCount,
9859+ LVL.getPrimaryInduction ());
9860+ // TODO: Directly add as extra operand to the VPResumePHI recipe.
9861+ Inc->setIncomingValueForBlock (BypassBlock, V);
9862+ }
9863+ }
9864+
98549865bool LoopVectorizePass::processLoop (Loop *L) {
98559866 assert ((EnableVPlanNativePath || L->isInnermost ()) &&
98569867 " VPlan-native path is not enabled. Only process inner loops." );
@@ -10227,18 +10238,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1022710238 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV, DT,
1022810239 true );
1022910240
10230- // Fix induction resume values from the additional bypass block.
10231- BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock ();
10232- IRBuilder<> BypassBuilder (BypassBlock, BypassBlock->getFirstInsertionPt ());
10233- BasicBlock *PH = L->getLoopPreheader ();
10234- for (const auto &[IVPhi, II] : LVL.getInductionVars ()) {
10235- auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10236- Value *V = createInductionAdditionalBypassValues (
10237- IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount ,
10238- LVL.getPrimaryInduction ());
10239- // TODO: Directly add as extra operand to the VPResumePHI recipe.
10240- Inc->setIncomingValueForBlock (BypassBlock, V);
10241- }
10241+ fixScalarResumeValuesFromBypass (EpilogILV.getAdditionalBypassBlock (), L,
10242+ BestEpiPlan, LVL, ExpandedSCEVs,
10243+ EPI.VectorTripCount );
1024210244 ++LoopsEpilogueVectorized;
1024310245
1024410246 if (!Checks.hasChecks ())
0 commit comments