@@ -519,14 +519,14 @@ class InnerLoopVectorizer {
519519 // / the induction resume value, and the value for the bypass block, if needed.
520520 // / \p Step is the SCEV-expanded induction step to use. In cases where the
521521 // / loop skeleton is more complicated (i.e., epilogue vectorization) and the
522- // / resume values can come from an additional bypass block, the \p
523- // / AdditionalBypass pair provides this additional bypass block along with the
524- // / resume value coming from it .
525- void createInductionResumeVPValue (
526- VPIRInstruction *InductionPhiIRI, const InductionDescriptor &ID,
527- Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
528- VPBuilder &ScalarPHBuilder,
529- std::pair<BasicBlock *, Value *> AdditionalBypass = { nullptr , nullptr } );
522+ // / resume values can come from an additional bypass block, \p
523+ // / AdditionalBypassValue provides the end value on the edge from bypass to
524+ // / this loop .
525+ void createInductionResumeVPValue (VPIRInstruction *InductionPhiIRI,
526+ const InductionDescriptor &ID, Value *Step ,
527+ ArrayRef<BasicBlock *> BypassBlocks,
528+ VPBuilder &ScalarPHBuilder,
529+ Value *AdditionalBypassValue = nullptr );
530530
531531 // / Returns the original loop trip count.
532532 Value *getTripCount () const { return TripCount; }
@@ -539,12 +539,14 @@ class InnerLoopVectorizer {
539539 // / Retrieve the bypass value associated with an original induction header
540540 // / phi.
541541 Value *getInductionAdditionalBypassValue (PHINode *OrigPhi) const {
542- return Induction2AdditionalBypass .at (OrigPhi). second ;
542+ return Induction2AdditionalBypassValue .at (OrigPhi);
543543 }
544544
545545 // / Return the additional bypass block.
546- BasicBlock *getInductionAdditionalBypassBlock () const {
547- return Induction2AdditionalBypass.begin ()->second .first ;
546+ BasicBlock *getAdditionalBypassBlock () const {
547+ assert (AdditionalBypassBlock &&
548+ " Trying to access AdditionalBypassBlock but it has not been set" );
549+ return AdditionalBypassBlock;
548550 }
549551
550552protected:
@@ -584,11 +586,10 @@ class InnerLoopVectorizer {
584586 // / in the scalar epilogue, from where the vectorized loop left off.
585587 // / In cases where the loop skeleton is more complicated (eg. epilogue
586588 // / vectorization) and the resume values can come from an additional bypass
587- // / block, the \p AdditionalBypass pair provides information about the bypass
588- // / block and the end value on the edge from bypass to this loop.
589- void createInductionResumeVPValues (
590- const SCEV2ValueTy &ExpandedSCEVs,
591- std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
589+ // / block, the \p AdditionalBypassValue provides the end value on the edge
590+ // / from bypass to this loop.
591+ void createInductionResumeVPValues (const SCEV2ValueTy &ExpandedSCEVs,
592+ Value *AdditionalBypassValue = nullptr );
592593
593594 // / Allow subclasses to override and print debug traces before/after vplan
594595 // / execution, when trace information is requested.
@@ -678,11 +679,15 @@ class InnerLoopVectorizer {
678679 // / for cleaning the checks, if vectorization turns out unprofitable.
679680 GeneratedRTChecks &RTChecks;
680681
681- // / Mapping of induction phis to their bypass values and bypass blocks. They
682+ // / The additional bypass block which conditionally skips over the epilogue
683+ // / loop after executing the main loop. Needed to resume inductions and
684+ // / reductions during epilogue vectorization.
685+ BasicBlock *AdditionalBypassBlock = nullptr ;
686+
687+ // / Mapping of induction phis to their additional bypass values. They
682688 // / need to be added as operands to phi nodes in the scalar loop preheader
683689 // / after the epilogue skeleton has been created.
684- DenseMap<PHINode *, std::pair<BasicBlock *, Value *>>
685- Induction2AdditionalBypass;
690+ DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;
686691
687692 VPlan &Plan;
688693};
@@ -2603,14 +2608,14 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
26032608void InnerLoopVectorizer::createInductionResumeVPValue (
26042609 VPIRInstruction *InductionPhiRI, const InductionDescriptor &II, Value *Step,
26052610 ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
2606- std::pair<BasicBlock *, Value *> AdditionalBypass ) {
2611+ Value *AdditionalBypassValue ) {
26072612 auto *OrigPhi = cast<PHINode>(&InductionPhiRI->getInstruction ());
26082613 Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
26092614 assert (VectorTripCount && " Expected valid arguments" );
26102615
26112616 Instruction *OldInduction = Legal->getPrimaryInduction ();
26122617 Value *EndValue = nullptr ;
2613- Value *EndValueFromAdditionalBypass = AdditionalBypass. second ;
2618+ Value *EndValueFromAdditionalBypass = AdditionalBypassValue ;
26142619 if (OrigPhi == OldInduction) {
26152620 // We know what the end value is.
26162621 EndValue = VectorTripCount;
@@ -2626,11 +2631,11 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
26262631 EndValue->setName (" ind.end" );
26272632
26282633 // Compute the end value for the additional bypass (if applicable).
2629- if (AdditionalBypass. first ) {
2630- B.SetInsertPoint (AdditionalBypass. first ,
2631- AdditionalBypass. first ->getFirstInsertionPt ());
2634+ if (AdditionalBypassValue ) {
2635+ B.SetInsertPoint (getAdditionalBypassBlock () ,
2636+ getAdditionalBypassBlock () ->getFirstInsertionPt ());
26322637 EndValueFromAdditionalBypass =
2633- emitTransformedIndex (B, AdditionalBypass. second , II.getStartValue (),
2638+ emitTransformedIndex (B, AdditionalBypassValue , II.getStartValue (),
26342639 Step, II.getKind (), II.getInductionBinOp ());
26352640 EndValueFromAdditionalBypass->setName (" ind.end" );
26362641 }
@@ -2644,14 +2649,13 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
26442649 " InductionPhiRI should not have any operands" );
26452650 InductionPhiRI->addOperand (ResumePhiRecipe);
26462651
2647- if (AdditionalBypass. first ) {
2652+ if (AdditionalBypassValue ) {
26482653 // Store the bypass value here, as it needs to be added as operand to its
26492654 // scalar preheader phi node after the epilogue skeleton has been created.
26502655 // TODO: Directly add as extra operand to the VPResumePHI recipe.
2651- assert (!Induction2AdditionalBypass .contains (OrigPhi) &&
2656+ assert (!Induction2AdditionalBypassValue .contains (OrigPhi) &&
26522657 " entry for OrigPhi already exits" );
2653- Induction2AdditionalBypass[OrigPhi] = {AdditionalBypass.first ,
2654- EndValueFromAdditionalBypass};
2658+ Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
26552659 }
26562660}
26572661
@@ -2670,19 +2674,13 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26702674}
26712675
26722676void InnerLoopVectorizer::createInductionResumeVPValues (
2673- const SCEV2ValueTy &ExpandedSCEVs,
2674- std::pair<BasicBlock *, Value *> AdditionalBypass) {
2675- assert (((AdditionalBypass.first && AdditionalBypass.second ) ||
2676- (!AdditionalBypass.first && !AdditionalBypass.second )) &&
2677- " Inconsistent information about additional bypass." );
2677+ const SCEV2ValueTy &ExpandedSCEVs, Value *AdditionalBypassValue) {
26782678 // We are going to resume the execution of the scalar loop.
26792679 // Go over all of the induction variable PHIs of the scalar loop header and
26802680 // fix their starting values, which depend on the counter of the last
2681- // iteration of the vectorized loop. The starting values of PHI nodes depend
2682- // on the counter of the last iteration in the vectorized loop. If we come
2683- // from one of the LoopBypassBlocks then we need to start from the original
2684- // start value. If we come from the AdditionalBypass then we need to start
2685- // from its value.
2681+ // iteration of the vectorized loop. If we come from one of the
2682+ // LoopBypassBlocks then we need to start from the original start value. If we
2683+ // come from the AdditionalBypass then we need to start from its value.
26862684 VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
26872685 VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
26882686 for (VPRecipeBase &R : *Plan.getScalarHeader ()) {
@@ -2695,7 +2693,7 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
26952693 const InductionDescriptor &II = Legal->getInductionVars ().find (Phi)->second ;
26962694 createInductionResumeVPValue (PhiR, II, getExpandedStep (II, ExpandedSCEVs),
26972695 LoopBypassBlocks, ScalarPHBuilder,
2698- AdditionalBypass );
2696+ AdditionalBypassValue );
26992697 }
27002698}
27012699
@@ -7744,7 +7742,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77447742 // 2.5 When vectorizing the epilogue, fix reduction and induction resume
77457743 // values from the additional bypass block.
77467744 if (VectorizingEpilogue) {
7747- BasicBlock *BypassBlock = ILV.getInductionAdditionalBypassBlock ();
7745+ BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock ();
77487746 for (VPRecipeBase &R : *ExitVPBB) {
77497747 fixReductionScalarResumeWhenVectorizingEpilog (
77507748 &R, State, State.CFG .VPBB2IRBB [ExitVPBB], BypassBlock);
@@ -7941,6 +7939,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79417939 nullptr , " vec.epilog.iter.check" , true );
79427940 emitMinimumVectorEpilogueIterCountCheck (LoopScalarPreHeader,
79437941 VecEpilogueIterationCountCheck);
7942+ AdditionalBypassBlock = VecEpilogueIterationCountCheck;
79447943
79457944 // Adjust the control flow taking the state info from the main loop
79467945 // vectorization into account.
@@ -8017,12 +8016,13 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80178016 // preheader.
80188017 PHINode *EPResumeVal = nullptr ;
80198018 Type *IdxTy = Legal->getWidestInductionType ();
8019+ Value *TC = EPI.VectorTripCount ;
8020+ Constant *Init = ConstantInt::get (IdxTy, 0 );
8021+
80208022 for (PHINode &P : LoopVectorPreHeader->phis ()) {
80218023 if (P.getType () == IdxTy &&
8022- P.getIncomingValueForBlock (VecEpilogueIterationCountCheck) ==
8023- EPI.VectorTripCount &&
8024- P.getIncomingValueForBlock (EPI.MainLoopIterationCountCheck ) ==
8025- ConstantInt::get (IdxTy, 0 )) {
8024+ P.getIncomingValueForBlock (VecEpilogueIterationCountCheck) == TC &&
8025+ P.getIncomingValueForBlock (EPI.MainLoopIterationCountCheck ) == Init) {
80268026 EPResumeVal = &P;
80278027 EPResumeVal->setName (" vec.epilog.resume.val" );
80288028 break ;
@@ -8031,22 +8031,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80318031 if (!EPResumeVal) {
80328032 EPResumeVal = PHINode::Create (IdxTy, 2 , " vec.epilog.resume.val" );
80338033 EPResumeVal->insertBefore (LoopVectorPreHeader->getFirstNonPHIIt ());
8034- EPResumeVal->addIncoming (EPI.VectorTripCount ,
8035- VecEpilogueIterationCountCheck);
8036- EPResumeVal->addIncoming (ConstantInt::get (IdxTy, 0 ),
8037- EPI.MainLoopIterationCountCheck );
8034+ EPResumeVal->addIncoming (TC, VecEpilogueIterationCountCheck);
8035+ EPResumeVal->addIncoming (Init, EPI.MainLoopIterationCountCheck );
80388036 }
80398037
80408038 // Generate induction resume values. These variables save the new starting
80418039 // indexes for the scalar loop. They are used to test if there are any tail
80428040 // iterations left once the vector loop has completed.
80438041 // Note that when the vectorized epilogue is skipped due to iteration count
80448042 // check, then the resume value for the induction variable comes from
8045- // the trip count of the main vector loop, hence passing the AdditionalBypass
8046- // argument.
8047- createInductionResumeVPValues (ExpandedSCEVs,
8048- {VecEpilogueIterationCountCheck,
8049- EPI.VectorTripCount } /* AdditionalBypass */ );
8043+ // the trip count of the main vector loop, hence passing the
8044+ // AdditionalBypassValue argument.
8045+ createInductionResumeVPValues (
8046+ ExpandedSCEVs, EPI.VectorTripCount /* AdditionalBypassValue */ );
80508047
80518048 return {LoopVectorPreHeader, EPResumeVal};
80528049}
@@ -10358,6 +10355,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1035810355 auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1035910356 IndPhi = WidenInd->getPHINode ();
1036010357 }
10358+ // Hook up to the PHINode generated by a ResumePhi recipe of main
10359+ // loop VPlan, which feeds the scalar loop.
1036110360 ResumeV = IndPhi->getIncomingValueForBlock (L->getLoopPreheader ());
1036210361 }
1036310362 assert (ResumeV && " Must have a resume value" );
0 commit comments