@@ -494,11 +494,8 @@ class InnerLoopVectorizer {
494494 // / is generated around the vectorized (and scalar epilogue) loops consisting
495495 // / of various checks and bypasses. Return the pre-header block of the new
496496 // / loop. In the case of epilogue vectorization, this function is overriden to
497- // / handle the more complex control flow around the loops. \p ExpandedSCEVs is
498- // / used to look up SCEV expansions for expressions needed during skeleton
499- // / creation.
500- virtual BasicBlock *
501- createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs);
497+ // / handle the more complex control flow around the loops.
498+ virtual BasicBlock *createVectorizedLoopSkeleton ();
502499
503500 // / Fix the vectorized code, taking care of header phi's, and more.
504501 void fixVectorizedLoop (VPTransformState &State);
@@ -526,12 +523,6 @@ class InnerLoopVectorizer {
526523 // / count of the original loop for both main loop and epilogue vectorization.
527524 void setTripCount (Value *TC) { TripCount = TC; }
528525
529- // Retrieve the additional bypass value associated with an original
530- // / induction header phi.
531- Value *getInductionAdditionalBypassValue (PHINode *OrigPhi) const {
532- return Induction2AdditionalBypassValue.at (OrigPhi);
533- }
534-
535526 // / Return the additional bypass block which targets the scalar loop by
536527 // / skipping the epilogue loop after completing the main loop.
537528 BasicBlock *getAdditionalBypassBlock () const {
@@ -568,11 +559,6 @@ class InnerLoopVectorizer {
568559 // / vector loop preheader, middle block and scalar preheader.
569560 void createVectorLoopSkeleton (StringRef Prefix);
570561
571- // / Create and record the values for induction variables to resume coming from
572- // / the additional bypass block.
573- void createInductionAdditionalBypassValues (const SCEV2ValueTy &ExpandedSCEVs,
574- Value *MainVectorTripCount);
575-
576562 // / Allow subclasses to override and print debug traces before/after vplan
577563 // / execution, when trace information is requested.
578564 virtual void printDebugTracesAtStart () {}
@@ -666,11 +652,6 @@ class InnerLoopVectorizer {
666652 // / for cleaning the checks, if vectorization turns out unprofitable.
667653 GeneratedRTChecks &RTChecks;
668654
669- // / Mapping of induction phis to their additional bypass values. They
670- // / need to be added as operands to phi nodes in the scalar loop preheader
671- // / after the epilogue skeleton has been created.
672- DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;
673-
674655 // / The additional bypass block which conditionally skips over the epilogue
675656 // / loop after executing the main loop. Needed to resume inductions and
676657 // / reductions during epilogue vectorization.
@@ -733,16 +714,14 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
733714
734715 // Override this function to handle the more complex control flow around the
735716 // three loops.
736- BasicBlock *
737- createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final {
738- return createEpilogueVectorizedLoopSkeleton (ExpandedSCEVs);
717+ BasicBlock *createVectorizedLoopSkeleton () final {
718+ return createEpilogueVectorizedLoopSkeleton ();
739719 }
740720
741721 // / The interface for creating a vectorized skeleton using one of two
742722 // / different strategies, each corresponding to one execution of the vplan
743723 // / as described above.
744- virtual BasicBlock *
745- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) = 0;
724+ virtual BasicBlock *createEpilogueVectorizedLoopSkeleton () = 0;
746725
747726 // / Holds and updates state information required to vectorize the main loop
748727 // / and its epilogue in two separate passes. This setup helps us avoid
@@ -770,8 +749,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
770749 EPI, LVL, CM, BFI, PSI, Check, Plan) {}
771750 // / Implements the interface for creating a vectorized skeleton using the
772751 // / *main loop* strategy (ie the first pass of vplan execution).
773- BasicBlock *
774- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final ;
752+ BasicBlock *createEpilogueVectorizedLoopSkeleton () final ;
775753
776754protected:
777755 // / Emits an iteration count bypass check once for the main loop (when \p
@@ -801,8 +779,7 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
801779 }
802780 // / Implements the interface for creating a vectorized skeleton using the
803781 // / *epilogue loop* strategy (ie the second pass of vplan execution).
804- BasicBlock *
805- createEpilogueVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs) final ;
782+ BasicBlock *createEpilogueVectorizedLoopSkeleton () final ;
806783
807784protected:
808785 // / Emits an iteration count bypass check after the main vector loop has
@@ -2679,44 +2656,7 @@ static void addFullyUnrolledInstructionsToIgnore(
26792656 }
26802657}
26812658
2682- void InnerLoopVectorizer::createInductionAdditionalBypassValues (
2683- const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
2684- assert (MainVectorTripCount && " Must have bypass information" );
2685-
2686- Instruction *OldInduction = Legal->getPrimaryInduction ();
2687- IRBuilder<> BypassBuilder (getAdditionalBypassBlock (),
2688- getAdditionalBypassBlock ()->getFirstInsertionPt ());
2689- for (const auto &InductionEntry : Legal->getInductionVars ()) {
2690- PHINode *OrigPhi = InductionEntry.first ;
2691- const InductionDescriptor &II = InductionEntry.second ;
2692- Value *Step = getExpandedStep (II, ExpandedSCEVs);
2693- // For the primary induction the additional bypass end value is known.
2694- // Otherwise it is computed.
2695- Value *EndValueFromAdditionalBypass = MainVectorTripCount;
2696- if (OrigPhi != OldInduction) {
2697- auto *BinOp = II.getInductionBinOp ();
2698- // Fast-math-flags propagate from the original induction instruction.
2699- if (isa_and_nonnull<FPMathOperator>(BinOp))
2700- BypassBuilder.setFastMathFlags (BinOp->getFastMathFlags ());
2701-
2702- // Compute the end value for the additional bypass.
2703- EndValueFromAdditionalBypass =
2704- emitTransformedIndex (BypassBuilder, MainVectorTripCount,
2705- II.getStartValue (), Step, II.getKind (), BinOp);
2706- EndValueFromAdditionalBypass->setName (" ind.end" );
2707- }
2708-
2709- // Store the bypass value here, as it needs to be added as operand to its
2710- // scalar preheader phi node after the epilogue skeleton has been created.
2711- // TODO: Directly add as extra operand to the VPResumePHI recipe.
2712- assert (!Induction2AdditionalBypassValue.contains (OrigPhi) &&
2713- " entry for OrigPhi already exits" );
2714- Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
2715- }
2716- }
2717-
2718- BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton (
2719- const SCEV2ValueTy &ExpandedSCEVs) {
2659+ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton () {
27202660 /*
27212661 In this function we generate a new loop. The new loop will contain
27222662 the vectorized instructions while the old loop will continue to run the
@@ -7628,17 +7568,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
76287568
76297569DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
76307570 ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
7631- InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue,
7632- const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
7571+ InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) {
76337572 assert (BestVPlan.hasVF (BestVF) &&
76347573 " Trying to execute plan with unsupported VF" );
76357574 assert (BestVPlan.hasUF (BestUF) &&
76367575 " Trying to execute plan with unsupported UF" );
7637- assert (
7638- ((VectorizingEpilogue && ExpandedSCEVs) ||
7639- (!VectorizingEpilogue && !ExpandedSCEVs)) &&
7640- " expanded SCEVs to reuse can only be used during epilogue vectorization" );
7641-
76427576 // TODO: Move to VPlan transform stage once the transition to the VPlan-based
76437577 // cost model is complete for better cost estimates.
76447578 VPlanTransforms::unrollByUF (BestVPlan, BestUF,
@@ -7670,8 +7604,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76707604 // middle block. The vector loop is created during VPlan execution.
76717605 VPBasicBlock *VectorPH =
76727606 cast<VPBasicBlock>(BestVPlan.getEntry ()->getSingleSuccessor ());
7673- State. CFG . PrevBB = ILV. createVectorizedLoopSkeleton (
7674- ExpandedSCEVs ? *ExpandedSCEVs : State. ExpandedSCEVs );
7607+
7608+ State. CFG . PrevBB = ILV. createVectorizedLoopSkeleton ( );
76757609 if (VectorizingEpilogue)
76767610 VPlanTransforms::removeDeadRecipes (BestVPlan);
76777611
@@ -7712,8 +7646,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77127646 BestVPlan.execute (&State);
77137647
77147648 auto *MiddleVPBB = BestVPlan.getMiddleBlock ();
7715- // 2.5 When vectorizing the epilogue, fix reduction and induction resume
7716- // values from the additional bypass block.
7649+ // 2.5 When vectorizing the epilogue, fix reduction resume values from the
7650+ // additional bypass block.
77177651 if (VectorizingEpilogue) {
77187652 assert (!ILV.Legal ->hasUncountableEarlyExit () &&
77197653 " Epilogue vectorisation not yet supported with early exits" );
@@ -7722,12 +7656,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77227656 fixReductionScalarResumeWhenVectorizingEpilog (
77237657 &R, State, State.CFG .VPBB2IRBB [MiddleVPBB], BypassBlock);
77247658 }
7725- BasicBlock *PH = OrigLoop->getLoopPreheader ();
7726- for (const auto &[IVPhi, _] : Legal->getInductionVars ()) {
7727- auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
7728- Value *V = ILV.getInductionAdditionalBypassValue (IVPhi);
7729- Inc->setIncomingValueForBlock (BypassBlock, V);
7730- }
77317659 }
77327660
77337661 // 2.6. Maintain Loop Hints
@@ -7789,8 +7717,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77897717
77907718// / This function is partially responsible for generating the control flow
77917719// / depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7792- BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton (
7793- const SCEV2ValueTy &ExpandedSCEVs) {
7720+ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton () {
77947721 createVectorLoopSkeleton (" " );
77957722
77967723 // Generate the code to check the minimum iteration count of the vector
@@ -7900,8 +7827,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
79007827// / This function is partially responsible for generating the control flow
79017828// / depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
79027829BasicBlock *
7903- EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton (
7904- const SCEV2ValueTy &ExpandedSCEVs) {
7830+ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton () {
79057831 createVectorLoopSkeleton (" vec.epilog." );
79067832
79077833 // Now, compare the remaining count and if there aren't enough iterations to
@@ -7969,11 +7895,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79697895 Phi->removeIncomingValue (EPI.MemSafetyCheck );
79707896 }
79717897
7972- // Generate bypass values from the additional bypass block. Note that when the
7973- // vectorized epilogue is skipped due to iteration count check, then the
7974- // resume value for the induction variable comes from the trip count of the
7975- // main vector loop, passed as the second argument.
7976- createInductionAdditionalBypassValues (ExpandedSCEVs, EPI.VectorTripCount );
79777898 return LoopVectorPreHeader;
79787899}
79797900
@@ -10314,6 +10235,33 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1031410235 }
1031510236}
1031610237
10238+ // Generate bypass values from the additional bypass block. Note that when the
10239+ // vectorized epilogue is skipped due to iteration count check, then the
10240+ // resume value for the induction variable comes from the trip count of the
10241+ // main vector loop, passed as the second argument.
10242+ static Value *createInductionAdditionalBypassValues (
10243+ PHINode *OrigPhi, const InductionDescriptor &II, IRBuilder<> &BypassBuilder,
10244+ const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount,
10245+ Instruction *OldInduction) {
10246+ Value *Step = getExpandedStep (II, ExpandedSCEVs);
10247+ // For the primary induction the additional bypass end value is known.
10248+ // Otherwise it is computed.
10249+ Value *EndValueFromAdditionalBypass = MainVectorTripCount;
10250+ if (OrigPhi != OldInduction) {
10251+ auto *BinOp = II.getInductionBinOp ();
10252+ // Fast-math-flags propagate from the original induction instruction.
10253+ if (isa_and_nonnull<FPMathOperator>(BinOp))
10254+ BypassBuilder.setFastMathFlags (BinOp->getFastMathFlags ());
10255+
10256+ // Compute the end value for the additional bypass.
10257+ EndValueFromAdditionalBypass =
10258+ emitTransformedIndex (BypassBuilder, MainVectorTripCount,
10259+ II.getStartValue (), Step, II.getKind (), BinOp);
10260+ EndValueFromAdditionalBypass->setName (" ind.end" );
10261+ }
10262+ return EndValueFromAdditionalBypass;
10263+ }
10264+
1031710265bool LoopVectorizePass::processLoop (Loop *L) {
1031810266 assert ((EnableVPlanNativePath || L->isInnermost ()) &&
1031910267 " VPlan-native path is not enabled. Only process inner loops." );
@@ -10699,7 +10647,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1069910647 preparePlanForEpilogueVectorLoop (BestEpiPlan, L, ExpandedSCEVs, EPI);
1070010648
1070110649 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10702- DT, true , &ExpandedSCEVs);
10650+ DT, true );
10651+
10652+ // Fix induction resume values from the additional bypass block.
10653+ BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock ();
10654+ IRBuilder<> BypassBuilder (BypassBlock,
10655+ BypassBlock->getFirstInsertionPt ());
10656+ BasicBlock *PH = L->getLoopPreheader ();
10657+ for (const auto &[IVPhi, II] : LVL.getInductionVars ()) {
10658+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10659+ Value *V = createInductionAdditionalBypassValues (
10660+ IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount ,
10661+ LVL.getPrimaryInduction ());
10662+ // TODO: Directly add as extra operand to the VPResumePHI recipe.
10663+ Inc->setIncomingValueForBlock (BypassBlock, V);
10664+ }
1070310665 ++LoopsEpilogueVectorized;
1070410666
1070510667 if (!MainILV.areSafetyChecksAdded ())
0 commit comments