@@ -523,7 +523,7 @@ class InnerLoopVectorizer {
523523 // / and the resume values can come from an additional bypass block, the \p
524524 // / AdditionalBypass pair provides information about the bypass block and the
525525 // / end value on the edge from bypass to this loop.
526- void createInductionResumeValue (
526+ void createInductionBypassValue (
527527 PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
528528 ArrayRef<BasicBlock *> BypassBlocks,
529529 std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
@@ -574,15 +574,11 @@ class InnerLoopVectorizer {
574574 // / vector loop preheader, middle block and scalar preheader.
575575 void createVectorLoopSkeleton (StringRef Prefix);
576576
577- // / Create new phi nodes for the induction variables to resume iteration count
578- // / in the scalar epilogue, from where the vectorized loop left off.
579- // / In cases where the loop skeleton is more complicated (eg. epilogue
580- // / vectorization) and the resume values can come from an additional bypass
581- // / block, the \p AdditionalBypass pair provides information about the bypass
582- // / block and the end value on the edge from bypass to this loop.
583- void createInductionResumeValues (
577+ // / Create values for the induction variables to resume iteration count
578+ // / in bypass block.
579+ void createInductionBypassValues (
584580 const SCEV2ValueTy &ExpandedSCEVs,
585- std::pair<BasicBlock *, Value *> AdditionalBypass = { nullptr , nullptr } );
581+ std::pair<BasicBlock *, Value *> AdditionalBypass);
586582
587583 // / Allow subclasses to override and print debug traces before/after vplan
588584 // / execution, when trace information is requested.
@@ -2602,30 +2598,19 @@ static void addOperandToPhiInVPIRBasicBlock(VPIRBasicBlock *VPBB, PHINode *P,
26022598 }
26032599}
26042600
2605- void InnerLoopVectorizer::createInductionResumeValue (
2601+ void InnerLoopVectorizer::createInductionBypassValue (
26062602 PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
26072603 ArrayRef<BasicBlock *> BypassBlocks,
26082604 std::pair<BasicBlock *, Value *> AdditionalBypass) {
2609- Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2610- assert (VectorTripCount && " Expected valid arguments" );
2611-
26122605 Instruction *OldInduction = Legal->getPrimaryInduction ();
2613- Value *EndValue = nullptr ;
26142606 Value *EndValueFromAdditionalBypass = AdditionalBypass.second ;
2615- if (OrigPhi == OldInduction) {
2616- // We know what the end value is.
2617- EndValue = VectorTripCount;
2618- } else {
2607+ if (OrigPhi != OldInduction) {
26192608 IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
26202609
26212610 // Fast-math-flags propagate from the original induction instruction.
26222611 if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
26232612 B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
26242613
2625- EndValue = emitTransformedIndex (B, VectorTripCount, II.getStartValue (),
2626- Step, II.getKind (), II.getInductionBinOp ());
2627- EndValue->setName (" ind.end" );
2628-
26292614 // Compute the end value for the additional bypass (if applicable).
26302615 if (AdditionalBypass.first ) {
26312616 B.SetInsertPoint (AdditionalBypass.first ,
@@ -2637,26 +2622,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
26372622 }
26382623 }
26392624
2640- VPBasicBlock *MiddleVPBB =
2641- cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
2642-
2643- VPBasicBlock *ScalarPHVPBB = nullptr ;
2644- if (MiddleVPBB->getNumSuccessors () == 2 ) {
2645- // Order is strict: first is the exit block, second is the scalar preheader.
2646- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
2647- } else {
2648- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
2649- }
2650-
2651- VPBuilder ScalarPHBuilder (ScalarPHVPBB);
2652- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
2653- VPInstruction::ResumePhi,
2654- {Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2655- OrigPhi->getDebugLoc (), " bc.resume.val" );
2656-
2657- auto *ScalarLoopHeader =
2658- cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
2659- addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
26602625 InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
26612626 EndValueFromAdditionalBypass};
26622627}
@@ -2675,23 +2640,16 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26752640 return I->second ;
26762641}
26772642
2678- void InnerLoopVectorizer::createInductionResumeValues (
2643+ void InnerLoopVectorizer::createInductionBypassValues (
26792644 const SCEV2ValueTy &ExpandedSCEVs,
26802645 std::pair<BasicBlock *, Value *> AdditionalBypass) {
2681- assert (((AdditionalBypass.first && AdditionalBypass.second ) ||
2682- (!AdditionalBypass.first && !AdditionalBypass.second )) &&
2683- " Inconsistent information about additional bypass." );
2684- // We are going to resume the execution of the scalar loop.
2685- // Go over all of the induction variables that we found and fix the
2686- // PHIs that are left in the scalar version of the loop.
2687- // The starting values of PHI nodes depend on the counter of the last
2688- // iteration in the vectorized loop.
2689- // If we come from a bypass edge then we need to start from the original
2690- // start value.
2646+ assert (AdditionalBypass.first && AdditionalBypass.second &&
2647+ " Must have bypass information" );
2648+
26912649 for (const auto &InductionEntry : Legal->getInductionVars ()) {
26922650 PHINode *OrigPhi = InductionEntry.first ;
26932651 const InductionDescriptor &II = InductionEntry.second ;
2694- createInductionResumeValue (OrigPhi, II, getExpandedStep (II, ExpandedSCEVs),
2652+ createInductionBypassValue (OrigPhi, II, getExpandedStep (II, ExpandedSCEVs),
26952653 LoopBypassBlocks, AdditionalBypass);
26962654 }
26972655}
@@ -2754,8 +2712,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
27542712 // faster.
27552713 emitMemRuntimeChecks (LoopScalarPreHeader);
27562714
2757- // Emit phis for the new starting index of the scalar loop.
2758- createInductionResumeValues (ExpandedSCEVs );
2715+ Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2716+ assert (VectorTripCount && " Expected valid arguments " );
27592717
27602718 return {LoopVectorPreHeader, nullptr };
27612719}
@@ -7719,6 +7677,18 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77197677 ILV.getOrCreateVectorTripCount (nullptr ),
77207678 CanonicalIVStartValue, State);
77217679
7680+ VPBasicBlock *MiddleVPBB =
7681+ cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7682+
7683+ VPBasicBlock *ScalarPHVPBB = nullptr ;
7684+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
7685+ // Order is strict: first is the exit block, second is the scalar
7686+ // preheader.
7687+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
7688+ } else {
7689+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
7690+ }
7691+
77227692 BestVPlan.execute (&State);
77237693
77247694 // 2.5 Collect reduction resume values.
@@ -7836,7 +7806,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78367806 } else
78377807 continue ;
78387808
7839- createInductionResumeValue (IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
7809+ createInductionBypassValue (IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
78407810 LoopBypassBlocks);
78417811 }
78427812
@@ -8006,20 +7976,22 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80067976 // Generate a resume induction for the vector epilogue and put it in the
80077977 // vector epilogue preheader
80087978 Type *IdxTy = Legal->getWidestInductionType ();
7979+
80097980 PHINode *EPResumeVal = PHINode::Create (IdxTy, 2 , " vec.epilog.resume.val" );
80107981 EPResumeVal->insertBefore (LoopVectorPreHeader->getFirstNonPHIIt ());
80117982 EPResumeVal->addIncoming (EPI.VectorTripCount , VecEpilogueIterationCountCheck);
80127983 EPResumeVal->addIncoming (ConstantInt::get (IdxTy, 0 ),
80137984 EPI.MainLoopIterationCountCheck );
80147985
8015- // Generate induction resume values. These variables save the new starting
8016- // indexes for the scalar loop. They are used to test if there are any tail
8017- // iterations left once the vector loop has completed.
7986+ Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
7987+ assert (VectorTripCount && " Expected valid arguments" );
7988+
7989+ // Generate induction resume values for the bypass blocks.
80187990 // Note that when the vectorized epilogue is skipped due to iteration count
80197991 // check, then the resume value for the induction variable comes from
80207992 // the trip count of the main vector loop, hence passing the AdditionalBypass
80217993 // argument.
8022- createInductionResumeValues (ExpandedSCEVs,
7994+ createInductionBypassValues (ExpandedSCEVs,
80237995 {VecEpilogueIterationCountCheck,
80247996 EPI.VectorTripCount } /* AdditionalBypass */ );
80257997
@@ -8932,6 +8904,74 @@ addUsersInExitBlock(VPlan &Plan,
89328904 }
89338905}
89348906
8907+ static void addResumeValuesForInductions (VPlan &Plan) {
8908+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8909+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8910+
8911+ VPBuilder Builder (
8912+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSinglePredecessor ()));
8913+ for (VPRecipeBase &R : Header->phis ()) {
8914+ PHINode *OrigPhi;
8915+ const InductionDescriptor *ID;
8916+ VPValue *Start;
8917+ VPValue *Step;
8918+ Type *ScalarTy;
8919+ bool IsCanonical = false ;
8920+ if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8921+ if (WideIV->getTruncInst ())
8922+ continue ;
8923+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8924+ ID = &WideIV->getInductionDescriptor ();
8925+ Start = WideIV->getStartValue ();
8926+ Step = WideIV->getStepValue ();
8927+ ScalarTy = WideIV->getScalarType ();
8928+ IsCanonical = WideIV->isCanonical ();
8929+ } else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8930+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8931+ ID = &WideIV->getInductionDescriptor ();
8932+ Start = WideIV->getStartValue ();
8933+ Step = WideIV->getOperand (1 );
8934+ ScalarTy = Start->getLiveInIRValue ()->getType ();
8935+ } else {
8936+ continue ;
8937+ }
8938+
8939+ VPValue *EndValue = &Plan.getVectorTripCount ();
8940+ if (!IsCanonical) {
8941+ EndValue = Builder.createDerivedIV (
8942+ ID->getKind (),
8943+ dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp ()), Start,
8944+ &Plan.getVectorTripCount (), Step);
8945+ }
8946+
8947+ if (ScalarTy != TypeInfo.inferScalarType (EndValue)) {
8948+ EndValue =
8949+ Builder.createScalarCast (Instruction::Trunc, EndValue, ScalarTy);
8950+ }
8951+
8952+ VPBasicBlock *MiddleVPBB =
8953+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8954+
8955+ VPBasicBlock *ScalarPHVPBB = nullptr ;
8956+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
8957+ // Order is strict: first is the exit block, second is the scalar
8958+ // preheader.
8959+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
8960+ } else {
8961+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
8962+ }
8963+
8964+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
8965+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
8966+ VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc (),
8967+ " bc.resume.val" );
8968+
8969+ auto *ScalarLoopHeader =
8970+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
8971+ addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8972+ }
8973+ }
8974+
89358975// / Handle users in the exit block for first order reductions in the original
89368976// / exit block. The penultimate value of recurrences is fed to their LCSSA phi
89378977// / users in the original exit block using the VPIRInstruction wrapping to the
@@ -9205,6 +9245,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92059245 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
92069246 addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
92079247 addUsersInExitBlock (*Plan, ExitUsersToFix);
9248+ addResumeValuesForInductions (*Plan);
9249+
92089250 // ---------------------------------------------------------------------------
92099251 // Transform initial VPlan: Apply previously taken decisions, in order, to
92109252 // bring the VPlan to its final state.
@@ -9315,6 +9357,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
93159357 bool HasNUW = true ;
93169358 addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
93179359 DebugLoc ());
9360+ addResumeValuesForInductions (*Plan);
93189361 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
93199362 return Plan;
93209363}
@@ -9599,7 +9642,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
95999642 State.Builder , CanonicalIV, getStartValue ()->getLiveInIRValue (), Step,
96009643 Kind, cast_if_present<BinaryOperator>(FPBinOp));
96019644 DerivedIV->setName (Name);
9602- assert (DerivedIV != CanonicalIV && " IV didn't need transforming?" );
9645+ /* assert((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&*/
9646+ /* "IV didn't need transforming?");*/
96039647
96049648 State.set (this , DerivedIV, VPLane (0 ));
96059649}
@@ -10268,6 +10312,52 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1026810312 EPI, &LVL, &CM, BFI, PSI, Checks,
1026910313 *BestMainPlan);
1027010314
10315+ VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
10316+ // Collect PHI nodes of wide inductions in the VPlan for the epilogue.
10317+ // Those will need their resume-values computed from the main vector
10318+ // loop. Others can be removed in the main VPlan.
10319+ SmallPtrSet<PHINode *, 2 > WidenedPhis;
10320+ for (VPRecipeBase &R :
10321+ BestEpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10322+ if (!isa<VPWidenIntOrFpInductionRecipe,
10323+ VPWidenPointerInductionRecipe>(&R))
10324+ continue ;
10325+ if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10326+ WidenedPhis.insert (
10327+ cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10328+ else
10329+ WidenedPhis.insert (
10330+ cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10331+ }
10332+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10333+ BestMainPlan->getVectorLoopRegion ()->getSingleSuccessor ());
10334+
10335+ VPBasicBlock *ScalarPHVPBB = nullptr ;
10336+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
10337+ // Order is strict: first is the exit block, second is the scalar
10338+ // preheader.
10339+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
10340+ } else {
10341+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
10342+ }
10343+
10344+ for (VPRecipeBase &R :
10345+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ())) {
10346+ auto *VPIRInst = cast<VPIRInstruction>(&R);
10347+ auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10348+ if (!IRI)
10349+ break ;
10350+ if (WidenedPhis.contains (IRI) ||
10351+ !LVL.getInductionVars ().contains (IRI))
10352+ continue ;
10353+ VPRecipeBase *ResumePhi =
10354+ VPIRInst->getOperand (0 )->getDefiningRecipe ();
10355+ VPIRInst->setOperand (0 , BestMainPlan->getOrAddLiveIn (
10356+ Constant::getNullValue (IRI->getType ())));
10357+ ResumePhi->eraseFromParent ();
10358+ }
10359+ VPlanTransforms::removeDeadRecipes (*BestMainPlan);
10360+
1027110361 auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
1027210362 *BestMainPlan, MainILV, DT, false );
1027310363 ++LoopsVectorized;
@@ -10276,7 +10366,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1027610366 // edges from the first pass.
1027710367 EPI.MainLoopVF = EPI.EpilogueVF ;
1027810368 EPI.MainLoopUF = EPI.EpilogueUF ;
10279- VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
1028010369 EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TLI, TTI, AC,
1028110370 ORE, EPI, &LVL, &CM, BFI, PSI,
1028210371 Checks, BestEpiPlan);
0 commit comments