@@ -8918,8 +8918,12 @@ static void addScalarResumePhis(
8918
8918
ScalarPhiIRI->addOperand (ResumePhi);
8919
8919
continue ;
8920
8920
}
8921
- if (!isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(VectorPhiR))
8921
+ if (!isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(
8922
+ VectorPhiR)) {
8923
+ assert (cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst () &&
8924
+ " should only skip truncated wide inductions" );
8922
8925
continue ;
8926
+ }
8923
8927
// The backedge value provides the value to resume coming out of a loop,
8924
8928
// which for FORs is a vector whose last element needs to be extracted. The
8925
8929
// start value provides the value if the loop is bypassed.
@@ -10031,6 +10035,63 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
10031
10035
VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced ||
10032
10036
!EnableLoopVectorization) {}
10033
10037
10038
+ // / Prepare \p MainPlan for vectorizing the main vector loop during epilogue
10039
+ // / vectorization. Remove ResumePhis from \p MainPlan for inductions if they
10040
+ // / don't have a corresponding wide induction in \p EpiPlan.
10041
+ static void preparePlanForMainVectorLoop (
10042
+ VPlan &MainPlan, VPlan &EpiPlan,
10043
+ const MapVector<PHINode *, InductionDescriptor> &Inductions) {
10044
+ // Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those
10045
+ // will need their resume-values computed from the main vector loop. Others
10046
+ // can be removed in the main VPlan.
10047
+ SmallPtrSet<PHINode *, 2 > WidenedPhis;
10048
+ for (VPRecipeBase &R :
10049
+ EpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10050
+ if (!isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(&R))
10051
+ continue ;
10052
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10053
+ WidenedPhis.insert (cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10054
+ else
10055
+ WidenedPhis.insert (
10056
+ cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10057
+ }
10058
+ for (VPRecipeBase &R : *cast<VPIRBasicBlock>(MainPlan.getScalarHeader ())) {
10059
+ auto *VPIRInst = cast<VPIRInstruction>(&R);
10060
+ auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10061
+ if (!IRI)
10062
+ break ;
10063
+ if (WidenedPhis.contains (IRI) || !Inductions.contains (IRI))
10064
+ continue ;
10065
+ // There is no corresponding wide induction in the epilogue plan that would
10066
+ // need a resume value. Set the operand in VPIRInst to zero, so ResumePhi
10067
+ // can be removed. The resume values for the scalar loop will be created
10068
+ // during execution of EpiPlan.
10069
+ VPRecipeBase *ResumePhi = VPIRInst->getOperand (0 )->getDefiningRecipe ();
10070
+ VPIRInst->setOperand (
10071
+ 0 , MainPlan.getOrAddLiveIn (Constant::getNullValue (IRI->getType ())));
10072
+ ResumePhi->eraseFromParent ();
10073
+ }
10074
+
10075
+ using namespace VPlanPatternMatch ;
10076
+ VPBasicBlock *ScalarPHVPBB = MainPlan.getScalarPreheader ();
10077
+ VPValue *VectorTC = &MainPlan.getVectorTripCount ();
10078
+ // If there is no suitable resume value for the canonical induction in the
10079
+ // epilogue loop, create it.
10080
+ if (none_of (*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) {
10081
+ return match (&R, m_VPInstruction<VPInstruction::ResumePhi>(
10082
+ m_Specific (VectorTC), m_SpecificInt (0 )));
10083
+ })) {
10084
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
10085
+ // When vectorizing the epilogue, create a resume phi for the
10086
+ // canonical IV if no suitable resume phi was already created.
10087
+ ScalarPHBuilder.createNaryOp (
10088
+ VPInstruction::ResumePhi,
10089
+ {VectorTC, MainPlan.getOrAddLiveIn (ConstantInt::get (
10090
+ MainPlan.getCanonicalIV ()->getScalarType (), 0 ))},
10091
+ {}, " vec.epilog.resume.val" );
10092
+ }
10093
+ }
10094
+
10034
10095
// / Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded
10035
10096
// / SCEVs from \p ExpandedSCEVs and set resume values for header recipes.
10036
10097
static void
@@ -10491,62 +10552,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10491
10552
// to be vectorized by executing the plan (potentially with a different
10492
10553
// factor) again shortly afterwards.
10493
10554
VPlan &BestEpiPlan = LVP.getPlanFor (EpilogueVF.Width );
10555
+ preparePlanForMainVectorLoop (*BestMainPlan, BestEpiPlan,
10556
+ LVL.getInductionVars ());
10494
10557
EpilogueLoopVectorizationInfo EPI (VF.Width , IC, EpilogueVF.Width , 1 ,
10495
10558
BestEpiPlan);
10496
10559
EpilogueVectorizerMainLoop MainILV (L, PSE, LI, DT, TLI, TTI, AC, ORE,
10497
10560
EPI, &LVL, &CM, BFI, PSI, Checks,
10498
10561
*BestMainPlan);
10499
-
10500
- // Collect PHI nodes of wide inductions in the VPlan for the epilogue.
10501
- // Those will need their resume-values computed from the main vector
10502
- // loop. Others can be removed in the main VPlan.
10503
- SmallPtrSet<PHINode *, 2 > WidenedPhis;
10504
- for (VPRecipeBase &R :
10505
- BestEpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10506
- if (!isa<VPWidenIntOrFpInductionRecipe,
10507
- VPWidenPointerInductionRecipe>(&R))
10508
- continue ;
10509
- if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10510
- WidenedPhis.insert (
10511
- cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10512
- else
10513
- WidenedPhis.insert (
10514
- cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10515
- }
10516
- for (VPRecipeBase &R :
10517
- *cast<VPIRBasicBlock>(BestMainPlan->getScalarHeader ())) {
10518
- auto *VPIRInst = cast<VPIRInstruction>(&R);
10519
- auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10520
- if (!IRI)
10521
- break ;
10522
- if (WidenedPhis.contains (IRI) ||
10523
- !LVL.getInductionVars ().contains (IRI))
10524
- continue ;
10525
- VPRecipeBase *ResumePhi =
10526
- VPIRInst->getOperand (0 )->getDefiningRecipe ();
10527
- VPIRInst->setOperand (0 , BestMainPlan->getOrAddLiveIn (
10528
- Constant::getNullValue (IRI->getType ())));
10529
- ResumePhi->eraseFromParent ();
10530
- }
10531
- // VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10532
-
10533
- using namespace VPlanPatternMatch ;
10534
- VPBasicBlock *ScalarPHVPBB = BestMainPlan->getScalarPreheader ();
10535
- VPValue *VectorTC = &BestMainPlan->getVectorTripCount ();
10536
- if (none_of (*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) {
10537
- return match (&R, m_VPInstruction<VPInstruction::ResumePhi>(
10538
- m_Specific (VectorTC), m_SpecificInt (0 )));
10539
- })) {
10540
- VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
10541
- // When vectorizing the epilogue, create a resume phi for the
10542
- // canonical IV if no suitable resume phi was already created.
10543
- ScalarPHBuilder.createNaryOp (
10544
- VPInstruction::ResumePhi,
10545
- {VectorTC, BestMainPlan->getOrAddLiveIn (ConstantInt::get (
10546
- LVL.getWidestInductionType (), 0 ))},
10547
- {}, " vec.epilog.resume.val" );
10548
- }
10549
-
10550
10562
auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10551
10563
*BestMainPlan, MainILV, DT, false );
10552
10564
++LoopsVectorized;
0 commit comments