@@ -8918,8 +8918,12 @@ static void addScalarResumePhis(
89188918 ScalarPhiIRI->addOperand (ResumePhi);
89198919 continue ;
89208920 }
8921- if (!isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(VectorPhiR))
8921+ if (!isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(
8922+ VectorPhiR)) {
8923+ assert (cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst () &&
8924+ " should only skip truncated wide inductions" );
89228925 continue ;
8926+ }
89238927 // The backedge value provides the value to resume coming out of a loop,
89248928 // which for FORs is a vector whose last element needs to be extracted. The
89258929 // start value provides the value if the loop is bypassed.
@@ -10031,6 +10035,63 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
1003110035 VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced ||
1003210036 !EnableLoopVectorization) {}
1003310037
10038+ // / Prepare \p MainPlan for vectorizing the main vector loop during epilogue
10039+ // / vectorization. Remove ResumePhis from \p MainPlan for inductions if they
10040+ // / don't have a corresponding wide induction in \p EpiPlan.
10041+ static void preparePlanForMainVectorLoop (
10042+ VPlan &MainPlan, VPlan &EpiPlan,
10043+ const MapVector<PHINode *, InductionDescriptor> &Inductions) {
10044+ // Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those
10045+ // will need their resume-values computed from the main vector loop. Others
10046+ // can be removed in the main VPlan.
10047+ SmallPtrSet<PHINode *, 2 > WidenedPhis;
10048+ for (VPRecipeBase &R :
10049+ EpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10050+ if (!isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(&R))
10051+ continue ;
10052+ if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10053+ WidenedPhis.insert (cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10054+ else
10055+ WidenedPhis.insert (
10056+ cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10057+ }
10058+ for (VPRecipeBase &R : *cast<VPIRBasicBlock>(MainPlan.getScalarHeader ())) {
10059+ auto *VPIRInst = cast<VPIRInstruction>(&R);
10060+ auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10061+ if (!IRI)
10062+ break ;
10063+ if (WidenedPhis.contains (IRI) || !Inductions.contains (IRI))
10064+ continue ;
10065+ // There is no corresponding wide induction in the epilogue plan that would
10066+ // need a resume value. Set the operand in VPIRInst to zero, so ResumePhi
10067+ // can be removed. The resume values for the scalar loop will be created
10068+ // during execution of EpiPlan.
10069+ VPRecipeBase *ResumePhi = VPIRInst->getOperand (0 )->getDefiningRecipe ();
10070+ VPIRInst->setOperand (
10071+ 0 , MainPlan.getOrAddLiveIn (Constant::getNullValue (IRI->getType ())));
10072+ ResumePhi->eraseFromParent ();
10073+ }
10074+
10075+ using namespace VPlanPatternMatch ;
10076+ VPBasicBlock *ScalarPHVPBB = MainPlan.getScalarPreheader ();
10077+ VPValue *VectorTC = &MainPlan.getVectorTripCount ();
10078+ // If there is no suitable resume value for the canonical induction in the
10079+ // epilogue loop, create it.
10080+ if (none_of (*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) {
10081+ return match (&R, m_VPInstruction<VPInstruction::ResumePhi>(
10082+ m_Specific (VectorTC), m_SpecificInt (0 )));
10083+ })) {
10084+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
10085+ // When vectorizing the epilogue, create a resume phi for the
10086+ // canonical IV if no suitable resume phi was already created.
10087+ ScalarPHBuilder.createNaryOp (
10088+ VPInstruction::ResumePhi,
10089+ {VectorTC, MainPlan.getOrAddLiveIn (ConstantInt::get (
10090+ MainPlan.getCanonicalIV ()->getScalarType (), 0 ))},
10091+ {}, " vec.epilog.resume.val" );
10092+ }
10093+ }
10094+
1003410095// / Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded
1003510096// / SCEVs from \p ExpandedSCEVs and set resume values for header recipes.
1003610097static void
@@ -10491,62 +10552,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1049110552 // to be vectorized by executing the plan (potentially with a different
1049210553 // factor) again shortly afterwards.
1049310554 VPlan &BestEpiPlan = LVP.getPlanFor (EpilogueVF.Width );
10555+ preparePlanForMainVectorLoop (*BestMainPlan, BestEpiPlan,
10556+ LVL.getInductionVars ());
1049410557 EpilogueLoopVectorizationInfo EPI (VF.Width , IC, EpilogueVF.Width , 1 ,
1049510558 BestEpiPlan);
1049610559 EpilogueVectorizerMainLoop MainILV (L, PSE, LI, DT, TLI, TTI, AC, ORE,
1049710560 EPI, &LVL, &CM, BFI, PSI, Checks,
1049810561 *BestMainPlan);
10499-
10500- // Collect PHI nodes of wide inductions in the VPlan for the epilogue.
10501- // Those will need their resume-values computed from the main vector
10502- // loop. Others can be removed in the main VPlan.
10503- SmallPtrSet<PHINode *, 2 > WidenedPhis;
10504- for (VPRecipeBase &R :
10505- BestEpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10506- if (!isa<VPWidenIntOrFpInductionRecipe,
10507- VPWidenPointerInductionRecipe>(&R))
10508- continue ;
10509- if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10510- WidenedPhis.insert (
10511- cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10512- else
10513- WidenedPhis.insert (
10514- cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10515- }
10516- for (VPRecipeBase &R :
10517- *cast<VPIRBasicBlock>(BestMainPlan->getScalarHeader ())) {
10518- auto *VPIRInst = cast<VPIRInstruction>(&R);
10519- auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10520- if (!IRI)
10521- break ;
10522- if (WidenedPhis.contains (IRI) ||
10523- !LVL.getInductionVars ().contains (IRI))
10524- continue ;
10525- VPRecipeBase *ResumePhi =
10526- VPIRInst->getOperand (0 )->getDefiningRecipe ();
10527- VPIRInst->setOperand (0 , BestMainPlan->getOrAddLiveIn (
10528- Constant::getNullValue (IRI->getType ())));
10529- ResumePhi->eraseFromParent ();
10530- }
10531- // VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10532-
10533- using namespace VPlanPatternMatch ;
10534- VPBasicBlock *ScalarPHVPBB = BestMainPlan->getScalarPreheader ();
10535- VPValue *VectorTC = &BestMainPlan->getVectorTripCount ();
10536- if (none_of (*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) {
10537- return match (&R, m_VPInstruction<VPInstruction::ResumePhi>(
10538- m_Specific (VectorTC), m_SpecificInt (0 )));
10539- })) {
10540- VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
10541- // When vectorizing the epilogue, create a resume phi for the
10542- // canonical IV if no suitable resume phi was already created.
10543- ScalarPHBuilder.createNaryOp (
10544- VPInstruction::ResumePhi,
10545- {VectorTC, BestMainPlan->getOrAddLiveIn (ConstantInt::get (
10546- LVL.getWidestInductionType (), 0 ))},
10547- {}, " vec.epilog.resume.val" );
10548- }
10549-
1055010562 auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
1055110563 *BestMainPlan, MainILV, DT, false );
1055210564 ++LoopsVectorized;
0 commit comments