@@ -517,13 +517,15 @@ class InnerLoopVectorizer {
517517 // / iteration count in the scalar epilogue, from where the vectorized loop
518518 // / left off. \p Step is the SCEV-expanded induction step to use. In cases
519519 // / where the loop skeleton is more complicated (i.e., epilogue vectorization)
520- // / and the resume values can come from an additional bypass block, the \p
521- // / AdditionalBypass pair provides information about the bypass block and the
522- // / end value on the edge from bypass to this loop.
523- PHINode *createInductionResumeValue (
524- PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
525- ArrayRef<BasicBlock *> BypassBlocks,
526- std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
520+ // / and the resume values can come from an additional bypass block,
521+ // / \p MainVectorTripCount provides the trip count of the main vector loop,
522+ // / used to compute the resume value reaching the scalar loop preheader
523+ // / directly from this additional bypass block.
524+ PHINode *createInductionResumeValue (PHINode *OrigPhi,
525+ const InductionDescriptor &ID,
526+ Value *Step,
527+ ArrayRef<BasicBlock *> BypassBlocks,
528+ Value *MainVectorTripCount = nullptr );
527529
528530 // / Returns the original loop trip count.
529531 Value *getTripCount () const { return TripCount; }
@@ -533,6 +535,14 @@ class InnerLoopVectorizer {
533535 // / count of the original loop for both main loop and epilogue vectorization.
534536 void setTripCount (Value *TC) { TripCount = TC; }
535537
538+ // / Return the additional bypass block which targets the scalar loop by
539+ // / skipping the epilogue loop after completing the main loop.
540+ BasicBlock *getAdditionalBypassBlock () const {
541+ assert (AdditionalBypassBlock &&
542+ " Trying to access AdditionalBypassBlock but it has not been set" );
543+ return AdditionalBypassBlock;
544+ }
545+
536546protected:
537547 friend class LoopVectorizationPlanner ;
538548
@@ -568,13 +578,11 @@ class InnerLoopVectorizer {
568578
569579 // / Create new phi nodes for the induction variables to resume iteration count
570580 // / in the scalar epilogue, from where the vectorized loop left off.
571- // / In cases where the loop skeleton is more complicated (eg. epilogue
572- // / vectorization) and the resume values can come from an additional bypass
573- // / block, the \p AdditionalBypass pair provides information about the bypass
574- // / block and the end value on the edge from bypass to this loop.
575- void createInductionResumeValues (
576- const SCEV2ValueTy &ExpandedSCEVs,
577- std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
581+ // / In cases where the loop skeleton is more complicated (i.e. epilogue
582+ // / vectorization), \p MainVectorTripCount provides the trip count of the main
583+ // / loop, used to compute these resume values.
584+ void createInductionResumeValues (const SCEV2ValueTy &ExpandedSCEVs,
585+ Value *MainVectorTripCount = nullptr );
578586
579587 // / Allow subclasses to override and print debug traces before/after vplan
580588 // / execution, when trace information is requested.
@@ -664,6 +672,11 @@ class InnerLoopVectorizer {
664672 // / for cleaning the checks, if vectorization turns out unprofitable.
665673 GeneratedRTChecks &RTChecks;
666674
675+ // / The additional bypass block which conditionally skips over the epilogue
676+ // / loop after executing the main loop. Needed to resume inductions and
677+ // / reductions during epilogue vectorization.
678+ BasicBlock *AdditionalBypassBlock = nullptr ;
679+
667680 VPlan &Plan;
668681};
669682
@@ -2582,18 +2595,16 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25822595
25832596PHINode *InnerLoopVectorizer::createInductionResumeValue (
25842597 PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2585- ArrayRef<BasicBlock *> BypassBlocks,
2586- std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598+ ArrayRef<BasicBlock *> BypassBlocks, Value *MainVectorTripCount) {
25872599 Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
25882600 assert (VectorTripCount && " Expected valid arguments" );
25892601
25902602 Instruction *OldInduction = Legal->getPrimaryInduction ();
2591- Value *EndValue = nullptr ;
2592- Value *EndValueFromAdditionalBypass = AdditionalBypass.second ;
2593- if (OrigPhi == OldInduction) {
2594- // We know what the end value is.
2595- EndValue = VectorTripCount;
2596- } else {
2603+ // For the primary induction the end values are known.
2604+ Value *EndValue = VectorTripCount;
2605+ Value *EndValueFromAdditionalBypass = MainVectorTripCount;
2606+ // Otherwise compute them accordingly.
2607+ if (OrigPhi != OldInduction) {
25972608 IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
25982609
25992610 // Fast-math-flags propagate from the original induction instruction.
@@ -2605,12 +2616,12 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26052616 EndValue->setName (" ind.end" );
26062617
26072618 // Compute the end value for the additional bypass (if applicable).
2608- if (AdditionalBypass. first ) {
2609- B.SetInsertPoint (AdditionalBypass. first ,
2610- AdditionalBypass. first ->getFirstInsertionPt ());
2619+ if (MainVectorTripCount ) {
2620+ B.SetInsertPoint (getAdditionalBypassBlock () ,
2621+ getAdditionalBypassBlock () ->getFirstInsertionPt ());
26112622 EndValueFromAdditionalBypass =
2612- emitTransformedIndex (B, AdditionalBypass. second , II.getStartValue (),
2613- Step, II.getKind (), II.getInductionBinOp ());
2623+ emitTransformedIndex (B, MainVectorTripCount , II.getStartValue (), Step ,
2624+ II.getKind (), II.getInductionBinOp ());
26142625 EndValueFromAdditionalBypass->setName (" ind.end" );
26152626 }
26162627 }
@@ -2632,8 +2643,8 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26322643 for (BasicBlock *BB : BypassBlocks)
26332644 BCResumeVal->addIncoming (II.getStartValue (), BB);
26342645
2635- if (AdditionalBypass. first )
2636- BCResumeVal->setIncomingValueForBlock (AdditionalBypass. first ,
2646+ if (MainVectorTripCount )
2647+ BCResumeVal->setIncomingValueForBlock (getAdditionalBypassBlock () ,
26372648 EndValueFromAdditionalBypass);
26382649 return BCResumeVal;
26392650}
@@ -2653,11 +2664,7 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26532664}
26542665
26552666void InnerLoopVectorizer::createInductionResumeValues (
2656- const SCEV2ValueTy &ExpandedSCEVs,
2657- std::pair<BasicBlock *, Value *> AdditionalBypass) {
2658- assert (((AdditionalBypass.first && AdditionalBypass.second ) ||
2659- (!AdditionalBypass.first && !AdditionalBypass.second )) &&
2660- " Inconsistent information about additional bypass." );
2667+ const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
26612668 // We are going to resume the execution of the scalar loop.
26622669 // Go over all of the induction variables that we found and fix the
26632670 // PHIs that are left in the scalar version of the loop.
@@ -2670,7 +2677,7 @@ void InnerLoopVectorizer::createInductionResumeValues(
26702677 const InductionDescriptor &II = InductionEntry.second ;
26712678 PHINode *BCResumeVal = createInductionResumeValue (
26722679 OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
2673- AdditionalBypass );
2680+ MainVectorTripCount );
26742681 OrigPhi->setIncomingValueForBlock (LoopScalarPreHeader, BCResumeVal);
26752682 }
26762683}
@@ -7918,6 +7925,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79187925 nullptr , " vec.epilog.iter.check" , true );
79197926 emitMinimumVectorEpilogueIterCountCheck (LoopScalarPreHeader,
79207927 VecEpilogueIterationCountCheck);
7928+ AdditionalBypassBlock = VecEpilogueIterationCountCheck;
79217929
79227930 // Adjust the control flow taking the state info from the main loop
79237931 // vectorization into account.
@@ -8002,11 +8010,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80028010 // iterations left once the vector loop has completed.
80038011 // Note that when the vectorized epilogue is skipped due to iteration count
80048012 // check, then the resume value for the induction variable comes from
8005- // the trip count of the main vector loop, hence passing the AdditionalBypass
8006- // argument.
8007- createInductionResumeValues (ExpandedSCEVs,
8008- {VecEpilogueIterationCountCheck,
8009- EPI.VectorTripCount } /* AdditionalBypass */ );
8013+ // the trip count of the main vector loop, passed as the second argument.
8014+ createInductionResumeValues (ExpandedSCEVs, EPI.VectorTripCount );
80108015
80118016 return {LoopVectorPreHeader, EPResumeVal};
80128017}
@@ -10325,7 +10330,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1032510330
1032610331 ResumeV = MainILV.createInductionResumeValue (
1032710332 IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
10328- { EPI.MainLoopIterationCountCheck } );
10333+ EPI.MainLoopIterationCountCheck );
1032910334 }
1033010335 assert (ResumeV && " Must have a resume value" );
1033110336 VPValue *StartVal = BestEpiPlan.getOrAddLiveIn (ResumeV);
0 commit comments