@@ -2660,6 +2660,8 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues(
26602660 assert (MainVectorTripCount && " Must have bypass information" );
26612661
26622662 Instruction *OldInduction = Legal->getPrimaryInduction ();
2663+ IRBuilder<> BypassBuilder (getAdditionalBypassBlock (),
2664+ getAdditionalBypassBlock ()->getFirstInsertionPt ());
26632665 for (const auto &InductionEntry : Legal->getInductionVars ()) {
26642666 PHINode *OrigPhi = InductionEntry.first ;
26652667 const InductionDescriptor &II = InductionEntry.second ;
@@ -2668,18 +2670,15 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues(
26682670 // Otherwise it is computed.
26692671 Value *EndValueFromAdditionalBypass = MainVectorTripCount;
26702672 if (OrigPhi != OldInduction) {
2671- IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
2672-
26732673 // Fast-math-flags propagate from the original induction instruction.
26742674 if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2675- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2675+ BypassBuilder.setFastMathFlags (
2676+ II.getInductionBinOp ()->getFastMathFlags ());
26762677
26772678 // Compute the end value for the additional bypass.
2678- B.SetInsertPoint (getAdditionalBypassBlock (),
2679- getAdditionalBypassBlock ()->getFirstInsertionPt ());
2680- EndValueFromAdditionalBypass =
2681- emitTransformedIndex (B, MainVectorTripCount, II.getStartValue (), Step,
2682- II.getKind (), II.getInductionBinOp ());
2679+ EndValueFromAdditionalBypass = emitTransformedIndex (
2680+ BypassBuilder, MainVectorTripCount, II.getStartValue (), Step,
2681+ II.getKind (), II.getInductionBinOp ());
26832682 EndValueFromAdditionalBypass->setName (" ind.end" );
26842683 }
26852684
@@ -8867,28 +8866,25 @@ static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR,
88678866 if (!WideIV)
88688867 return nullptr ;
88698868
8869+ auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8870+ // Truncated wide inductions resume from the last lane of their vector value
8871+ // in the last vector iteration which is handled elsewhere.
8872+ if (WideIntOrFp && WideIntOrFp->getTruncInst ())
8873+ return nullptr ;
8874+
88708875 VPValue *Start = WideIV->getStartValue ();
88718876 VPValue *Step = WideIV->getStepValue ();
88728877 const InductionDescriptor &ID = WideIV->getInductionDescriptor ();
8873- Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType (WideIV);
8874- bool IsCanonical = false ;
8875- if (auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(PhiR)) {
8876- // Truncated wide inductions resume from the last lane of their vector value
8877- // in the last vector iteration which is handled elsewhere.
8878- if (WideIntOrFp->getTruncInst ())
8879- return nullptr ;
8880- IsCanonical = WideIntOrFp->isCanonical ();
8881- }
8882-
88838878 VPValue *EndValue = VectorTC;
8884- if (!IsCanonical ) {
8879+ if (!WideIntOrFp || !WideIntOrFp-> isCanonical () ) {
88858880 EndValue = VectorPHBuilder.createDerivedIV (
88868881 ID.getKind (), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp ()),
88878882 Start, VectorTC, Step);
88888883 }
88898884
88908885 // EndValue is derived from the vector trip count (which has the same type as
88918886 // the widest induction) and thus may be wider than the induction here.
8887+ Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType (WideIV);
88928888 if (ScalarTypeOfWideIV != TypeInfo.inferScalarType (EndValue)) {
88938889 EndValue = VectorPHBuilder.createScalarCast (Instruction::Trunc, EndValue,
88948890 ScalarTypeOfWideIV);
@@ -8903,9 +8899,7 @@ static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR,
89038899// / Create resume phis in the scalar preheader for first-order recurrences,
89048900// / reductions and inductions, and update the VPIRInstructions wrapping the
89058901// / original phis in the scalar header.
8906- static void addScalarResumePhis (
8907- VPlan &Plan,
8908- function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe) {
8902+ static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan) {
89098903 VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
89108904 auto *ScalarPH = Plan.getScalarPreheader ();
89118905 auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
@@ -8921,7 +8915,7 @@ static void addScalarResumePhis(
89218915 if (!ScalarPhiI)
89228916 break ;
89238917
8924- auto *VectorPhiR = GetHeaderPhiRecipe ( ScalarPhiI);
8918+ auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder. getRecipe ( ScalarPhiI) );
89258919 if (isa<VPWidenInductionRecipe>(VectorPhiR)) {
89268920 if (VPValue *ResumePhi = addResumePhiRecipeForInduction (
89278921 VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -9049,9 +9043,9 @@ addUsersInExitBlocks(VPlan &Plan,
90499043static void addExitUsersForFirstOrderRecurrences (
90509044 VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) {
90519045 VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
9052- auto *MainScalarPH = Plan.getScalarPreheader ();
9046+ auto *ScalarPHVPBB = Plan.getScalarPreheader ();
90539047 auto *MiddleVPBB = Plan.getMiddleBlock ();
9054- VPBuilder ScalarPHBuilder (MainScalarPH );
9048+ VPBuilder ScalarPHBuilder (ScalarPHVPBB );
90559049 VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
90569050 VPValue *TwoVPV = Plan.getOrAddLiveIn (
90579051 ConstantInt::get (Plan.getCanonicalIV ()->getScalarType (), 2 ));
@@ -9317,9 +9311,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93179311 VPlanTransforms::handleUncountableEarlyExit (
93189312 *Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93199313 }
9320- addScalarResumePhis (*Plan, [&RecipeBuilder](PHINode *P) {
9321- return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe (P));
9322- });
9314+ addScalarResumePhis (RecipeBuilder, *Plan);
93239315 SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
93249316 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
93259317 addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
@@ -9441,18 +9433,16 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
94419433 addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
94429434 DebugLoc ());
94439435
9444- addScalarResumePhis (
9445- *Plan,
9446- [&Plan](PHINode *P) {
9447- return find_singleton<VPHeaderPHIRecipe>(
9448- Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis (),
9449- [P](VPRecipeBase &R, bool ) -> VPHeaderPHIRecipe * {
9450- auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9451- return HeaderR->getUnderlyingValue () == P ? HeaderR : nullptr ;
9452- });
9453- }
9454-
9455- );
9436+ // Collect mapping of IR header phis to header phi recipes, to be used in
9437+ // addScalarResumePhis.
9438+ VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
9439+ for (auto &R : Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
9440+ if (isa<VPCanonicalIVPHIRecipe>(&R))
9441+ continue ;
9442+ auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9443+ RecipeBuilder.setRecipe (HeaderR->getUnderlyingInstr (), HeaderR);
9444+ }
9445+ addScalarResumePhis (RecipeBuilder, *Plan);
94569446
94579447 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
94589448 return Plan;
@@ -9747,8 +9737,12 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
97479737 State.Builder , Index, getStartValue ()->getLiveInIRValue (), Step, Kind,
97489738 cast_if_present<BinaryOperator>(FPBinOp));
97499739 DerivedIV->setName (Name);
9750- // Index may only be set to constant 0 in prepareToExecute.
9751- assert ((DerivedIV != Index || cast<ConstantInt>(Index)->isNullValue ()) &&
9740+ // If index is the vector trip count, the concrete value will only be set in
9741+ // prepareToExecute, leading to missed simplifications, e.g. if it is 0.
9742+ // TODO: Remove the special case for the vector trip count once it is computed
9743+ // in VPlan and can be used during VPlan simplification.
9744+ assert ((DerivedIV != Index ||
9745+ getOperand (1 ) == &getParent ()->getPlan ()->getVectorTripCount ()) &&
97529746 " IV didn't need transforming?" );
97539747 State.set (this , DerivedIV, VPLane (0 ));
97549748}
@@ -10074,8 +10068,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
1007410068 EpiWidenedPhis.insert (
1007510069 cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
1007610070 }
10077- for (VPRecipeBase &R : make_early_inc_range (
10078- *cast<VPIRBasicBlock>(MainPlan.getScalarHeader ()))) {
10071+ for (VPRecipeBase &R : *cast<VPIRBasicBlock>(MainPlan.getScalarHeader ())) {
1007910072 auto *VPIRInst = cast<VPIRInstruction>(&R);
1008010073 auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
1008110074 if (!IRI)
@@ -10095,19 +10088,19 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
1009510088 using namespace VPlanPatternMatch ;
1009610089 VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader ();
1009710090 VPValue *VectorTC = &MainPlan.getVectorTripCount ();
10098- // If there is no suitable resume value for the canonical induction in the
10099- // scalar (which will become vector) epilogue loop, create it.
10100- if (none_of (*MainScalarPH, [VectorTC](VPRecipeBase &R) {
10091+ // If there is a suitable resume value for the canonical induction in the
10092+ // scalar (which will become vector) epilogue loop we are done. Otherwise
10093+ // create it below.
10094+ if (any_of (*MainScalarPH, [VectorTC](VPRecipeBase &R) {
1010110095 return match (&R, m_VPInstruction<VPInstruction::ResumePhi>(
1010210096 m_Specific (VectorTC), m_SpecificInt (0 )));
10103- })) {
10104- VPBuilder ScalarPHBuilder (MainScalarPH, MainScalarPH->begin ());
10105- ScalarPHBuilder.createNaryOp (
10106- VPInstruction::ResumePhi,
10107- {VectorTC, MainPlan.getOrAddLiveIn (ConstantInt::get (
10108- MainPlan.getCanonicalIV ()->getScalarType (), 0 ))},
10109- {}, " vec.epilog.resume.val" );
10110- }
10097+ }))
10098+ return ;
10099+ VPBuilder ScalarPHBuilder (MainScalarPH, MainScalarPH->begin ());
10100+ ScalarPHBuilder.createNaryOp (
10101+ VPInstruction::ResumePhi,
10102+ {VectorTC, MainPlan.getCanonicalIV ()->getStartValue ()}, {},
10103+ " vec.epilog.resume.val" );
1011110104}
1011210105
1011310106// / Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded
0 commit comments