@@ -503,11 +503,8 @@ class InnerLoopVectorizer {
503503 // / is generated around the vectorized (and scalar epilogue) loops consisting
504504 // / of various checks and bypasses. Return the pre-header block of the new
505505 // / loop. In the case of epilogue vectorization, this function is overriden to
506- // / handle the more complex control flow around the loops. \p ExpandedSCEVs is
507- // / used to look up SCEV expansions for expressions needed during skeleton
508- // / creation.
509- virtual BasicBlock *
510- createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs);
506+ // / handle the more complex control flow around the loops.
507+ virtual BasicBlock *createVectorizedLoopSkeleton ();
511508
512509 // / Fix the vectorized code, taking care of header phi's, and more.
513510 void fixVectorizedLoop (VPTransformState &State);
@@ -535,12 +532,6 @@ class InnerLoopVectorizer {
535532 // / count of the original loop for both main loop and epilogue vectorization.
536533 void setTripCount (Value *TC) { TripCount = TC; }
537534
538- // Retrieve the additional bypass value associated with an original
539- // / induction header phi.
540- Value *getInductionAdditionalBypassValue (PHINode *OrigPhi) const {
541- return Induction2AdditionalBypassValue.at (OrigPhi);
542- }
543-
544535 // / Return the additional bypass block which targets the scalar loop by
545536 // / skipping the epilogue loop after completing the main loop.
546537 BasicBlock *getAdditionalBypassBlock () const {
@@ -577,11 +568,6 @@ class InnerLoopVectorizer {
577568 // / vector loop preheader, middle block and scalar preheader.
578569 void createVectorLoopSkeleton (StringRef Prefix);
579570
580- // / Create and record the values for induction variables to resume coming from
581- // / the additional bypass block.
582- void createInductionAdditionalBypassValues (const SCEV2ValueTy &ExpandedSCEVs,
583- Value *MainVectorTripCount);
584-
585571 // / Allow subclasses to override and print debug traces before/after vplan
586572 // / execution, when trace information is requested.
587573 virtual void printDebugTracesAtStart () {}
@@ -671,11 +657,6 @@ class InnerLoopVectorizer {
671657 // / for cleaning the checks, if vectorization turns out unprofitable.
672658 GeneratedRTChecks &RTChecks;
673659
674- // / Mapping of induction phis to their additional bypass values. They
675- // / need to be added as operands to phi nodes in the scalar loop preheader
676- // / after the epilogue skeleton has been created.
677- DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;
678-
679660 // / The additional bypass block which conditionally skips over the epilogue
680661 // / loop after executing the main loop. Needed to resume inductions and
681662 // / reductions during epilogue vectorization.
@@ -738,16 +719,14 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
738719
739720 // Override this function to handle the more complex control flow around the
740721 // three loops.
741- BasicBlock *
742- createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final {
743- return createEpilogueVectorizedLoopSkeleton (ExpandedSCEVs);
722+ BasicBlock *createVectorizedLoopSkeleton () final {
723+ return createEpilogueVectorizedLoopSkeleton ();
744724 }
745725
746726 // / The interface for creating a vectorized skeleton using one of two
747727 // / different strategies, each corresponding to one execution of the vplan
748728 // / as described above.
749- virtual BasicBlock *
750- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) = 0;
729+ virtual BasicBlock *createEpilogueVectorizedLoopSkeleton () = 0;
751730
752731 // / Holds and updates state information required to vectorize the main loop
753732 // / and its epilogue in two separate passes. This setup helps us avoid
@@ -775,8 +754,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
775754 EPI, LVL, CM, BFI, PSI, Check, Plan) {}
776755 // / Implements the interface for creating a vectorized skeleton using the
777756 // / *main loop* strategy (ie the first pass of vplan execution).
778- BasicBlock *
779- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final ;
757+ BasicBlock *createEpilogueVectorizedLoopSkeleton () final ;
780758
781759protected:
782760 // / Emits an iteration count bypass check once for the main loop (when \p
@@ -806,8 +784,7 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
806784 }
807785 // / Implements the interface for creating a vectorized skeleton using the
808786 // / *epilogue loop* strategy (ie the second pass of vplan execution).
809- BasicBlock *
810- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final ;
787+ BasicBlock *createEpilogueVectorizedLoopSkeleton () final ;
811788
812789protected:
813790 // / Emits an iteration count bypass check after the main vector loop has
@@ -2722,44 +2699,7 @@ static void addFullyUnrolledInstructionsToIgnore(
27222699 }
27232700}
27242701
2725- void InnerLoopVectorizer::createInductionAdditionalBypassValues (
2726- const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
2727- assert (MainVectorTripCount && " Must have bypass information" );
2728-
2729- Instruction *OldInduction = Legal->getPrimaryInduction ();
2730- IRBuilder<> BypassBuilder (getAdditionalBypassBlock (),
2731- getAdditionalBypassBlock ()->getFirstInsertionPt ());
2732- for (const auto &InductionEntry : Legal->getInductionVars ()) {
2733- PHINode *OrigPhi = InductionEntry.first ;
2734- const InductionDescriptor &II = InductionEntry.second ;
2735- Value *Step = getExpandedStep (II, ExpandedSCEVs);
2736- // For the primary induction the additional bypass end value is known.
2737- // Otherwise it is computed.
2738- Value *EndValueFromAdditionalBypass = MainVectorTripCount;
2739- if (OrigPhi != OldInduction) {
2740- auto *BinOp = II.getInductionBinOp ();
2741- // Fast-math-flags propagate from the original induction instruction.
2742- if (isa_and_nonnull<FPMathOperator>(BinOp))
2743- BypassBuilder.setFastMathFlags (BinOp->getFastMathFlags ());
2744-
2745- // Compute the end value for the additional bypass.
2746- EndValueFromAdditionalBypass =
2747- emitTransformedIndex (BypassBuilder, MainVectorTripCount,
2748- II.getStartValue (), Step, II.getKind (), BinOp);
2749- EndValueFromAdditionalBypass->setName (" ind.end" );
2750- }
2751-
2752- // Store the bypass value here, as it needs to be added as operand to its
2753- // scalar preheader phi node after the epilogue skeleton has been created.
2754- // TODO: Directly add as extra operand to the VPResumePHI recipe.
2755- assert (!Induction2AdditionalBypassValue.contains (OrigPhi) &&
2756- " entry for OrigPhi already exits" );
2757- Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
2758- }
2759- }
2760-
2761- BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton (
2762- const SCEV2ValueTy &ExpandedSCEVs) {
2702+ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton () {
27632703 /*
27642704 In this function we generate a new loop. The new loop will contain
27652705 the vectorized instructions while the old loop will continue to run the
@@ -7726,16 +7666,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
77267666
77277667DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
77287668 ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
7729- InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue,
7730- const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
7669+ InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) {
77317670 assert (BestVPlan.hasVF (BestVF) &&
77327671 " Trying to execute plan with unsupported VF" );
77337672 assert (BestVPlan.hasUF (BestUF) &&
77347673 " Trying to execute plan with unsupported UF" );
7735- assert (
7736- ((VectorizingEpilogue && ExpandedSCEVs) ||
7737- (!VectorizingEpilogue && !ExpandedSCEVs)) &&
7738- " expanded SCEVs to reuse can only be used during epilogue vectorization" );
77397674 // TODO: Move to VPlan transform stage once the transition to the VPlan-based
77407675 // cost model is complete for better cost estimates.
77417676 VPlanTransforms::runPass (VPlanTransforms::unrollByUF, BestVPlan, BestUF,
@@ -7773,8 +7708,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77737708 // middle block. The vector loop is created during VPlan execution.
77747709 VPBasicBlock *VectorPH =
77757710 cast<VPBasicBlock>(BestVPlan.getEntry ()->getSingleSuccessor ());
7776- State. CFG . PrevBB = ILV. createVectorizedLoopSkeleton (
7777- ExpandedSCEVs ? *ExpandedSCEVs : State. ExpandedSCEVs );
7711+
7712+ State. CFG . PrevBB = ILV. createVectorizedLoopSkeleton ( );
77787713 if (VectorizingEpilogue)
77797714 VPlanTransforms::removeDeadRecipes (BestVPlan);
77807715
@@ -7815,8 +7750,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78157750 BestVPlan.execute (&State);
78167751
78177752 auto *MiddleVPBB = BestVPlan.getMiddleBlock ();
7818- // 2.5 When vectorizing the epilogue, fix reduction and induction resume
7819- // values from the additional bypass block.
7753+ // 2.5 When vectorizing the epilogue, fix reduction resume values from the
7754+ // additional bypass block.
78207755 if (VectorizingEpilogue) {
78217756 assert (!ILV.Legal ->hasUncountableEarlyExit () &&
78227757 " Epilogue vectorisation not yet supported with early exits" );
@@ -7834,11 +7769,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78347769 fixReductionScalarResumeWhenVectorizingEpilog (
78357770 &R, State, State.CFG .VPBB2IRBB [MiddleVPBB], BypassBlock);
78367771 }
7837- for (const auto &[IVPhi, _] : Legal->getInductionVars ()) {
7838- auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
7839- Value *V = ILV.getInductionAdditionalBypassValue (IVPhi);
7840- Inc->setIncomingValueForBlock (BypassBlock, V);
7841- }
78427772 }
78437773
78447774 // 2.6. Maintain Loop Hints
@@ -7900,8 +7830,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
79007830
79017831// / This function is partially responsible for generating the control flow
79027832// / depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7903- BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton (
7904- const SCEV2ValueTy &ExpandedSCEVs) {
7833+ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton () {
79057834 createVectorLoopSkeleton (" " );
79067835
79077836 // Generate the code to check the minimum iteration count of the vector
@@ -8011,8 +7940,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
80117940// / This function is partially responsible for generating the control flow
80127941// / depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
80137942BasicBlock *
8014- EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton (
8015- const SCEV2ValueTy &ExpandedSCEVs) {
7943+ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton () {
80167944 createVectorLoopSkeleton (" vec.epilog." );
80177945
80187946 // Now, compare the remaining count and if there aren't enough iterations to
@@ -8080,11 +8008,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80808008 Phi->removeIncomingValue (EPI.MemSafetyCheck );
80818009 }
80828010
8083- // Generate bypass values from the additional bypass block. Note that when the
8084- // vectorized epilogue is skipped due to iteration count check, then the
8085- // resume value for the induction variable comes from the trip count of the
8086- // main vector loop, passed as the second argument.
8087- createInductionAdditionalBypassValues (ExpandedSCEVs, EPI.VectorTripCount );
80888011 return LoopVectorPreHeader;
80898012}
80908013
@@ -10529,6 +10452,33 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1052910452 }
1053010453}
1053110454
10455+ // Generate bypass values from the additional bypass block. Note that when the
10456+ // vectorized epilogue is skipped due to iteration count check, then the
10457+ // resume value for the induction variable comes from the trip count of the
10458+ // main vector loop, passed as the second argument.
10459+ static Value *createInductionAdditionalBypassValues (
10460+ PHINode *OrigPhi, const InductionDescriptor &II, IRBuilder<> &BypassBuilder,
10461+ const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount,
10462+ Instruction *OldInduction) {
10463+ Value *Step = getExpandedStep (II, ExpandedSCEVs);
10464+ // For the primary induction the additional bypass end value is known.
10465+ // Otherwise it is computed.
10466+ Value *EndValueFromAdditionalBypass = MainVectorTripCount;
10467+ if (OrigPhi != OldInduction) {
10468+ auto *BinOp = II.getInductionBinOp ();
10469+ // Fast-math-flags propagate from the original induction instruction.
10470+ if (isa_and_nonnull<FPMathOperator>(BinOp))
10471+ BypassBuilder.setFastMathFlags (BinOp->getFastMathFlags ());
10472+
10473+ // Compute the end value for the additional bypass.
10474+ EndValueFromAdditionalBypass =
10475+ emitTransformedIndex (BypassBuilder, MainVectorTripCount,
10476+ II.getStartValue (), Step, II.getKind (), BinOp);
10477+ EndValueFromAdditionalBypass->setName (" ind.end" );
10478+ }
10479+ return EndValueFromAdditionalBypass;
10480+ }
10481+
1053210482bool LoopVectorizePass::processLoop (Loop *L) {
1053310483 assert ((EnableVPlanNativePath || L->isInnermost ()) &&
1053410484 " VPlan-native path is not enabled. Only process inner loops." );
@@ -10912,7 +10862,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1091210862 preparePlanForEpilogueVectorLoop (BestEpiPlan, L, ExpandedSCEVs, EPI);
1091310863
1091410864 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10915- DT, true , &ExpandedSCEVs);
10865+ DT, true );
10866+
10867+ // Fix induction resume values from the additional bypass block.
10868+ BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock ();
10869+ IRBuilder<> BypassBuilder (BypassBlock,
10870+ BypassBlock->getFirstInsertionPt ());
10871+ BasicBlock *PH = L->getLoopPreheader ();
10872+ for (const auto &[IVPhi, II] : LVL.getInductionVars ()) {
10873+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10874+ Value *V = createInductionAdditionalBypassValues (
10875+ IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount ,
10876+ LVL.getPrimaryInduction ());
10877+ // TODO: Directly add as extra operand to the VPResumePHI recipe.
10878+ Inc->setIncomingValueForBlock (BypassBlock, V);
10879+ }
1091610880 ++LoopsEpilogueVectorized;
1091710881
1091810882 if (!MainILV.areSafetyChecksAdded ())
0 commit comments