@@ -517,14 +517,15 @@ class InnerLoopVectorizer {
517
517
// / Fix the non-induction PHIs in \p Plan.
518
518
void fixNonInductionPHIs (VPTransformState &State);
519
519
520
- // / Create the bypass resume value coming from the additional bypass block. \p
521
- // / Step is the SCEV-expanded induction step to use. \p MainVectorTripCount
522
- // / provides the trip count of the main vector loop, used to compute the
523
- // / resume value reaching the scalar loop preheader directly from this
524
- // / additional bypass block.
525
- void createInductionBypassValue (PHINode *OrigPhi,
526
- const InductionDescriptor &ID, Value *Step,
527
- Value *MainVectorTripCount);
520
+ // / Create and record the bypass resume value for an induction Phi coming from
521
+ // / the additional bypass block. \p Step is the SCEV-expanded induction step
522
+ // / to use. \p MainVectorTripCount provides the trip count of the main vector
523
+ // / loop, used to compute the resume value reaching the scalar loop preheader
524
+ // / directly from this additional bypass block.
525
+ void createInductionAdditionalBypassValue (PHINode *OrigPhi,
526
+ const InductionDescriptor &ID,
527
+ Value *Step,
528
+ Value *MainVectorTripCount);
528
529
529
530
// / Returns the original loop trip count.
530
531
Value *getTripCount () const { return TripCount; }
@@ -581,10 +582,10 @@ class InnerLoopVectorizer {
581
582
// / vector loop preheader, middle block and scalar preheader.
582
583
void createVectorLoopSkeleton (StringRef Prefix);
583
584
584
- // / Create values for the induction variables to resume iteration count
585
- // / in the bypass block.
586
- void createInductionBypassValues (const SCEV2ValueTy &ExpandedSCEVs,
587
- Value *MainVectorTripCount);
585
+ // / Create and record the values for induction variables to resume coming from
586
+ // / the additional bypass block.
587
+ void createInductionAdditionalBypassValues (const SCEV2ValueTy &ExpandedSCEVs,
588
+ Value *MainVectorTripCount);
588
589
589
590
// / Allow subclasses to override and print debug traces before/after vplan
590
591
// / execution, when trace information is requested.
@@ -2604,10 +2605,12 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2604
2605
nullptr , Twine (Prefix) + " scalar.ph" );
2605
2606
}
2606
2607
2607
- void InnerLoopVectorizer::createInductionBypassValue (
2608
+ void InnerLoopVectorizer::createInductionAdditionalBypassValue (
2608
2609
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2609
2610
Value *MainVectorTripCount) {
2610
2611
Instruction *OldInduction = Legal->getPrimaryInduction ();
2612
+ // For the primary induction the additional bypass end value is known.
2613
+ // Otherwise it is computed.
2611
2614
Value *EndValueFromAdditionalBypass = MainVectorTripCount;
2612
2615
if (OrigPhi != OldInduction) {
2613
2616
IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
@@ -2616,7 +2619,7 @@ void InnerLoopVectorizer::createInductionBypassValue(
2616
2619
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2617
2620
B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2618
2621
2619
- // Compute the end value for the additional bypass (if applicable) .
2622
+ // Compute the end value for the additional bypass.
2620
2623
if (MainVectorTripCount) {
2621
2624
B.SetInsertPoint (getAdditionalBypassBlock (),
2622
2625
getAdditionalBypassBlock ()->getFirstInsertionPt ());
@@ -2672,15 +2675,15 @@ static void addFullyUnrolledInstructionsToIgnore(
2672
2675
}
2673
2676
}
2674
2677
2675
- void InnerLoopVectorizer::createInductionBypassValues (
2678
+ void InnerLoopVectorizer::createInductionAdditionalBypassValues (
2676
2679
const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
2677
2680
assert (MainVectorTripCount && " Must have bypass information" );
2678
2681
2679
2682
for (const auto &InductionEntry : Legal->getInductionVars ()) {
2680
2683
PHINode *OrigPhi = InductionEntry.first ;
2681
2684
const InductionDescriptor &II = InductionEntry.second ;
2682
- createInductionBypassValue (OrigPhi, II, getExpandedStep (II, ExpandedSCEVs),
2683
- MainVectorTripCount);
2685
+ createInductionAdditionalBypassValue (
2686
+ OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), MainVectorTripCount);
2684
2687
}
2685
2688
}
2686
2689
@@ -2741,9 +2744,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2741
2744
// faster.
2742
2745
emitMemRuntimeChecks (LoopScalarPreHeader);
2743
2746
2744
- Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2745
- assert (VectorTripCount && " Expected valid arguments" );
2746
-
2747
2747
return LoopVectorPreHeader;
2748
2748
}
2749
2749
@@ -7736,8 +7736,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7736
7736
// ===------------------------------------------------===//
7737
7737
7738
7738
// 2. Copy and widen instructions from the old loop into the new loop.
7739
- BestVPlan.prepareToExecute (ILV.getTripCount (),
7740
- ILV.getOrCreateVectorTripCount (nullptr ), State);
7739
+ BestVPlan.prepareToExecute (
7740
+ ILV.getTripCount (),
7741
+ ILV.getOrCreateVectorTripCount (ILV.LoopVectorPreHeader ), State);
7741
7742
7742
7743
BestVPlan.execute (&State);
7743
7744
@@ -7844,8 +7845,6 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7844
7845
// Generate the induction variable.
7845
7846
EPI.VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
7846
7847
7847
- createInductionResumeVPValues (ExpandedSCEVs, nullptr , &WideIVs);
7848
-
7849
7848
return LoopVectorPreHeader;
7850
7849
}
7851
7850
@@ -8010,14 +8009,11 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8010
8009
Phi->removeIncomingValue (EPI.MemSafetyCheck );
8011
8010
}
8012
8011
8013
- // Generate induction resume values. These variables save the new starting
8014
- // indexes for the scalar loop. They are used to test if there are any tail
8015
- // iterations left once the vector loop has completed.
8016
- // Note that when the vectorized epilogue is skipped due to iteration count
8017
- // check, then the resume value for the induction variable comes from
8018
- // the trip count of the main vector loop, passed as the second argument.
8019
- createInductionResumeVPValues (ExpandedSCEVs, EPI.VectorTripCount );
8020
-
8012
+ // Generate bypass values from the additional bypass block. Note that when the
8013
+ // vectorized epilogue is skipped due to iteration count check, then the
8014
+ // resume value for the induction variable comes from the trip count of the
8015
+ // main vector loop, passed as the second argument.
8016
+ createInductionAdditionalBypassValues (ExpandedSCEVs, EPI.VectorTripCount );
8021
8017
return LoopVectorPreHeader;
8022
8018
}
8023
8019
@@ -8822,30 +8818,33 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8822
8818
{CanonicalIVIncrement, &Plan.getVectorTripCount ()}, DL);
8823
8819
}
8824
8820
8821
+ // / Create a ResumePhi for \p PhiR, if it is wide induction recipe. If the
8822
+ // / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8823
+ // / the end value of the induction.
8825
8824
static VPValue *addResumeValuesForInduction (VPHeaderPHIRecipe *PhiR,
8826
- VPBuilder &Builder ,
8825
+ VPBuilder &VectorPHBuilder ,
8827
8826
VPBuilder &ScalarPHBuilder,
8828
8827
VPTypeAnalysis &TypeInfo,
8829
8828
VPValue *VectorTC) {
8830
8829
PHINode *OrigPhi;
8831
8830
const InductionDescriptor *ID;
8832
- VPValue *Start;
8831
+ VPValue *Start = PhiR-> getStartValue () ;
8833
8832
VPValue *Step;
8834
8833
Type *ScalarTy;
8835
8834
bool IsCanonical = false ;
8836
8835
if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(PhiR)) {
8836
+ // Truncated wide inductions resume from the last lane of their vector value
8837
+ // in the last vector iteration.
8837
8838
if (WideIV->getTruncInst ())
8838
8839
return nullptr ;
8839
8840
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8840
8841
ID = &WideIV->getInductionDescriptor ();
8841
- Start = WideIV->getStartValue ();
8842
8842
Step = WideIV->getStepValue ();
8843
8843
ScalarTy = WideIV->getScalarType ();
8844
8844
IsCanonical = WideIV->isCanonical ();
8845
8845
} else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(PhiR)) {
8846
8846
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8847
8847
ID = &WideIV->getInductionDescriptor ();
8848
- Start = WideIV->getStartValue ();
8849
8848
Step = WideIV->getOperand (1 );
8850
8849
ScalarTy = Start->getLiveInIRValue ()->getType ();
8851
8850
} else {
@@ -8854,14 +8853,17 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8854
8853
8855
8854
VPValue *EndValue = VectorTC;
8856
8855
if (!IsCanonical) {
8857
- EndValue = Builder .createDerivedIV (
8856
+ EndValue = VectorPHBuilder .createDerivedIV (
8858
8857
ID->getKind (),
8859
8858
dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp ()), Start,
8860
8859
VectorTC, Step);
8861
8860
}
8862
8861
8862
+ // EndValue is based on the vector trip count (which has the same type as the
8863
+ // widest induction) and thus may be wider than the induction here.
8863
8864
if (ScalarTy != TypeInfo.inferScalarType (EndValue)) {
8864
- EndValue = Builder.createScalarCast (Instruction::Trunc, EndValue, ScalarTy);
8865
+ EndValue = VectorPHBuilder.createScalarCast (Instruction::Trunc, EndValue,
8866
+ ScalarTy);
8865
8867
}
8866
8868
8867
8869
auto *ResumePhiRecipe =
@@ -8870,10 +8872,12 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8870
8872
return ResumePhiRecipe;
8871
8873
}
8872
8874
8873
- // / Create resume phis in the scalar preheader for first-order recurrences and
8874
- // / reductions and update the VPIRInstructions wrapping the original phis in the
8875
- // / scalar header.
8876
- static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan) {
8875
+ // / Create resume phis in the scalar preheader for first-order recurrences,
8876
+ // / reductions and inductions, and update the VPIRInstructions wrapping the
8877
+ // / original phis in the scalar header.
8878
+ static void addScalarResumePhis (
8879
+ VPlan &Plan,
8880
+ function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe) {
8877
8881
VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8878
8882
auto *ScalarPH = Plan.getScalarPreheader ();
8879
8883
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
@@ -8888,7 +8892,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8888
8892
auto *ScalarPhiI = dyn_cast<PHINode>(&ScalarPhiIRI->getInstruction ());
8889
8893
if (!ScalarPhiI)
8890
8894
break ;
8891
- auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder. getRecipe ( ScalarPhiI) );
8895
+ auto *VectorPhiR = GetHeaderPhiRecipe ( ScalarPhiI);
8892
8896
8893
8897
if (VPValue *ResumePhi = addResumeValuesForInduction (
8894
8898
VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -9277,7 +9281,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9277
9281
VPlanTransforms::handleUncountableEarlyExit (
9278
9282
*Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9279
9283
}
9280
- addScalarResumePhis (RecipeBuilder, *Plan);
9284
+ addScalarResumePhis (*Plan, [&RecipeBuilder](PHINode *P) {
9285
+ return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe (P));
9286
+ });
9281
9287
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
9282
9288
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9283
9289
addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
@@ -9399,6 +9405,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9399
9405
bool HasNUW = true ;
9400
9406
addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
9401
9407
DebugLoc ());
9408
+
9409
+ addScalarResumePhis (
9410
+ *Plan,
9411
+ [&Plan](PHINode *P) {
9412
+ return find_singleton<VPHeaderPHIRecipe>(
9413
+ Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis (),
9414
+ [P](VPRecipeBase &R, bool ) -> VPHeaderPHIRecipe * {
9415
+ auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9416
+ return HeaderR->getUnderlyingValue () == P ? HeaderR : nullptr ;
9417
+ });
9418
+ }
9419
+
9420
+ );
9421
+
9402
9422
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
9403
9423
return Plan;
9404
9424
}
@@ -10490,7 +10510,24 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10490
10510
Constant::getNullValue (IRI->getType ())));
10491
10511
ResumePhi->eraseFromParent ();
10492
10512
}
10493
- VPlanTransforms::removeDeadRecipes (*BestMainPlan);
10513
+ // VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10514
+
10515
+ using namespace VPlanPatternMatch ;
10516
+ VPBasicBlock *ScalarPHVPBB = BestMainPlan->getScalarPreheader ();
10517
+ VPValue *VectorTC = &BestMainPlan->getVectorTripCount ();
10518
+ if (none_of (*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) {
10519
+ return match (&R, m_VPInstruction<VPInstruction::ResumePhi>(
10520
+ m_Specific (VectorTC), m_SpecificInt (0 )));
10521
+ })) {
10522
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
10523
+ // When vectorizing the epilogue, create a resume phi for the
10524
+ // canonical IV if no suitable resume phi was already created.
10525
+ ScalarPHBuilder.createNaryOp (
10526
+ VPInstruction::ResumePhi,
10527
+ {VectorTC, BestMainPlan->getOrAddLiveIn (ConstantInt::get (
10528
+ LVL.getWidestInductionType (), 0 ))},
10529
+ {}, " vec.epilog.resume.val" );
10530
+ }
10494
10531
10495
10532
auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10496
10533
*BestMainPlan, MainILV, DT, false );
0 commit comments