@@ -2575,22 +2575,15 @@ void InnerLoopVectorizer::createInductionResumeValue(
25752575 assert (VectorTripCount && " Expected valid arguments" );
25762576
25772577 Instruction *OldInduction = Legal->getPrimaryInduction ();
2578- Value *EndValue = nullptr ;
25792578 Value *EndValueFromAdditionalBypass = AdditionalBypass.second ;
25802579 if (OrigPhi == OldInduction) {
2581- // We know what the end value is.
2582- EndValue = VectorTripCount;
25832580 } else {
25842581 IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
25852582
25862583 // Fast-math-flags propagate from the original induction instruction.
25872584 if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
25882585 B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
25892586
2590- EndValue = emitTransformedIndex (B, VectorTripCount, II.getStartValue (),
2591- Step, II.getKind (), II.getInductionBinOp ());
2592- EndValue->setName (" ind.end" );
2593-
25942587 // Compute the end value for the additional bypass (if applicable).
25952588 if (AdditionalBypass.first ) {
25962589 B.SetInsertPoint (AdditionalBypass.first ,
@@ -2602,26 +2595,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
26022595 }
26032596 }
26042597
2605- VPBasicBlock *MiddleVPBB =
2606- cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
2607-
2608- VPBasicBlock *ScalarPHVPBB = nullptr ;
2609- if (MiddleVPBB->getNumSuccessors () == 2 ) {
2610- // Order is strict: first is the exit block, second is the scalar preheader.
2611- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
2612- } else {
2613- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
2614- }
2615-
2616- VPBuilder ScalarPHBuilder (ScalarPHVPBB);
2617- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
2618- VPInstruction::ResumePhi,
2619- {Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2620- OrigPhi->getDebugLoc (), " bc.resume.val" );
2621-
2622- auto *ScalarLoopHeader =
2623- cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
2624- addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
26252598 InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
26262599 EndValueFromAdditionalBypass};
26272600}
@@ -7660,10 +7633,22 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76607633 ILV.getOrCreateVectorTripCount (nullptr ),
76617634 CanonicalIVStartValue, State);
76627635
7636+ VPBasicBlock *MiddleVPBB =
7637+ cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7638+
7639+ VPBasicBlock *ScalarPHVPBB = nullptr ;
7640+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
7641+ // Order is strict: first is the exit block, second is the scalar
7642+ // preheader.
7643+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
7644+ } else {
7645+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
7646+ }
7647+
76637648 BestVPlan.execute (&State);
76647649
76657650 // 2.5 Collect reduction resume values.
7666- auto *ExitVPBB =
7651+ VPBasicBlock *ExitVPBB =
76677652 cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
76687653 for (VPRecipeBase &R : *ExitVPBB) {
76697654 createAndCollectMergePhiForReduction (
@@ -7948,6 +7933,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79487933 // Generate a resume induction for the vector epilogue and put it in the
79497934 // vector epilogue preheader
79507935 Type *IdxTy = Legal->getWidestInductionType ();
7936+
79517937 PHINode *EPResumeVal = PHINode::Create (IdxTy, 2 , " vec.epilog.resume.val" );
79527938 EPResumeVal->insertBefore (LoopVectorPreHeader->getFirstNonPHIIt ());
79537939 EPResumeVal->addIncoming (EPI.VectorTripCount , VecEpilogueIterationCountCheck);
@@ -8835,6 +8821,74 @@ addUsersInExitBlock(VPlan &Plan,
88358821 }
88368822}
88378823
8824+ static void addResumeValuesForInductions (VPlan &Plan) {
8825+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8826+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8827+
8828+ VPBuilder Builder (
8829+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSinglePredecessor ()));
8830+ for (VPRecipeBase &R : Header->phis ()) {
8831+ PHINode *OrigPhi;
8832+ const InductionDescriptor *ID;
8833+ VPValue *Start;
8834+ VPValue *Step;
8835+ Type *ScalarTy;
8836+ bool IsCanonical = false ;
8837+ if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8838+ if (WideIV->getTruncInst ())
8839+ continue ;
8840+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8841+ ID = &WideIV->getInductionDescriptor ();
8842+ Start = WideIV->getStartValue ();
8843+ Step = WideIV->getStepValue ();
8844+ ScalarTy = WideIV->getScalarType ();
8845+ IsCanonical = WideIV->isCanonical ();
8846+ } else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8847+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8848+ ID = &WideIV->getInductionDescriptor ();
8849+ Start = WideIV->getStartValue ();
8850+ Step = WideIV->getOperand (1 );
8851+ ScalarTy = Start->getLiveInIRValue ()->getType ();
8852+ } else {
8853+ continue ;
8854+ }
8855+
8856+ VPValue *EndValue = &Plan.getVectorTripCount ();
8857+ if (!IsCanonical) {
8858+ EndValue = Builder.createDerivedIV (
8859+ ID->getKind (),
8860+ dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp ()), Start,
8861+ &Plan.getVectorTripCount (), Step);
8862+ }
8863+
8864+ if (ScalarTy != TypeInfo.inferScalarType (EndValue)) {
8865+ EndValue =
8866+ Builder.createScalarCast (Instruction::Trunc, EndValue, ScalarTy);
8867+ }
8868+
8869+ VPBasicBlock *MiddleVPBB =
8870+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8871+
8872+ VPBasicBlock *ScalarPHVPBB = nullptr ;
8873+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
8874+ // Order is strict: first is the exit block, second is the scalar
8875+ // preheader.
8876+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
8877+ } else {
8878+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
8879+ }
8880+
8881+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
8882+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
8883+ VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc (),
8884+ " bc.resume.val" );
8885+
8886+ auto *ScalarLoopHeader =
8887+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
8888+ addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8889+ }
8890+ }
8891+
88388892// / Handle live-outs for first order reductions, both in the scalar preheader
88398893// / and the original exit block:
88408894// / 1. Feed a resume value for every FOR from the vector loop to the scalar
@@ -9145,6 +9199,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91459199 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
91469200 addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
91479201 addUsersInExitBlock (*Plan, ExitUsersToFix);
9202+ addResumeValuesForInductions (*Plan);
91489203
91499204 // ---------------------------------------------------------------------------
91509205 // Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9250,6 +9305,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
92509305 bool HasNUW = true ;
92519306 addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
92529307 DebugLoc ());
9308+ addResumeValuesForInductions (*Plan);
92539309 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
92549310 return Plan;
92559311}
@@ -9533,7 +9589,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
95339589 State.Builder , CanonicalIV, getStartValue ()->getLiveInIRValue (), Step,
95349590 Kind, cast_if_present<BinaryOperator>(FPBinOp));
95359591 DerivedIV->setName (" offset.idx" );
9536- assert (DerivedIV != CanonicalIV && " IV didn't need transforming?" );
9592+ assert ((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
9593+ " IV didn't need transforming?" );
95379594
95389595 State.set (this , DerivedIV, VPLane (0 ));
95399596}
@@ -10202,6 +10259,50 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1020210259 EPI, &LVL, &CM, BFI, PSI, Checks,
1020310260 *BestMainPlan);
1020410261
10262+ VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
10263+ // Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those will need their resume-values computed from the main vector loop. Others can be removed in the main VPlan.
10264+ SmallPtrSet<PHINode *, 2 > WidenedPhis;
10265+ for (VPRecipeBase &R :
10266+ BestEpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10267+ if (!isa<VPWidenIntOrFpInductionRecipe,
10268+ VPWidenPointerInductionRecipe>(&R))
10269+ continue ;
10270+ if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10271+ WidenedPhis.insert (
10272+ cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10273+ else
10274+ WidenedPhis.insert (
10275+ cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10276+ }
10277+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10278+ BestMainPlan->getVectorLoopRegion ()->getSingleSuccessor ());
10279+
10280+ VPBasicBlock *ScalarPHVPBB = nullptr ;
10281+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
10282+ // Order is strict: first is the exit block, second is the scalar
10283+ // preheader.
10284+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
10285+ } else {
10286+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
10287+ }
10288+
10289+ for (VPRecipeBase &R :
10290+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ())) {
10291+ auto *VPIRInst = cast<VPIRInstruction>(&R);
10292+ auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10293+ if (!IRI)
10294+ break ;
10295+ if (WidenedPhis.contains (IRI) ||
10296+ !LVL.getInductionVars ().contains (IRI))
10297+ continue ;
10298+ VPRecipeBase *ResumePhi =
10299+ VPIRInst->getOperand (0 )->getDefiningRecipe ();
10300+ VPIRInst->setOperand (0 , BestMainPlan->getOrAddLiveIn (
10301+ Constant::getNullValue (IRI->getType ())));
10302+ ResumePhi->eraseFromParent ();
10303+ }
10304+ VPlanTransforms::removeDeadRecipes (*BestMainPlan);
10305+
1020510306 auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
1020610307 *BestMainPlan, MainILV, DT, true );
1020710308 ++LoopsVectorized;
@@ -10210,7 +10311,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1021010311 // edges from the first pass.
1021110312 EPI.MainLoopVF = EPI.EpilogueVF ;
1021210313 EPI.MainLoopUF = EPI.EpilogueUF ;
10213- VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
1021410314 EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TLI, TTI, AC,
1021510315 ORE, EPI, &LVL, &CM, BFI, PSI,
1021610316 Checks, BestEpiPlan);
0 commit comments