@@ -10368,22 +10368,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
1036810368 VPlanTransforms::runPass (VPlanTransforms::removeDeadRecipes, MainPlan);
1036910369
1037010370 using namespace VPlanPatternMatch ;
10371- VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader ();
10372- VPValue *VectorTC = &MainPlan.getVectorTripCount ();
10373- // If there is a suitable resume value for the canonical induction in the
10374- // scalar (which will become vector) epilogue loop we are done. Otherwise
10375- // create it below.
10376- if (any_of (*MainScalarPH, [VectorTC](VPRecipeBase &R) {
10377- return match (&R, m_VPInstruction<VPInstruction::ResumePhi>(
10378- m_Specific (VectorTC), m_SpecificInt (0 )));
10379- }))
10380- return ;
10381- VPBuilder ScalarPHBuilder (MainScalarPH, MainScalarPH->begin ());
10382- ScalarPHBuilder.createNaryOp (
10383- VPInstruction::ResumePhi,
10384- {VectorTC, MainPlan.getCanonicalIV ()->getStartValue ()}, {},
10385- " vec.epilog.resume.val" );
10386-
1038710371 // When vectorizing the epilogue, FindLastIV reductions can introduce multiple
1038810372 // uses of undef/poison. If the reduction start value may be undef or poison
1038910373 // it needs to be frozen and the frozen start has to be used when computing
@@ -10413,6 +10397,22 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
1041310397 };
1041410398 AddFreezeForFindLastIVReductions (MainPlan, true );
1041510399 AddFreezeForFindLastIVReductions (EpiPlan, false );
10400+
10401+ VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader ();
10402+ VPValue *VectorTC = &MainPlan.getVectorTripCount ();
10403+ // If there is a suitable resume value for the canonical induction in the
10404+ // scalar (which will become vector) epilogue loop we are done. Otherwise
10405+ // create it below.
10406+ if (any_of (*MainScalarPH, [VectorTC](VPRecipeBase &R) {
10407+ return match (&R, m_VPInstruction<VPInstruction::ResumePhi>(
10408+ m_Specific (VectorTC), m_SpecificInt (0 )));
10409+ }))
10410+ return ;
10411+ VPBuilder ScalarPHBuilder (MainScalarPH, MainScalarPH->begin ());
10412+ ScalarPHBuilder.createNaryOp (
10413+ VPInstruction::ResumePhi,
10414+ {VectorTC, MainPlan.getCanonicalIV ()->getStartValue ()}, {},
10415+ " vec.epilog.resume.val" );
1041610416}
1041710417
1041810418// / Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded
@@ -10521,20 +10521,24 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1052110521 cast<VPHeaderPHIRecipe>(&R)->setStartValue (StartVal);
1052210522 }
1052310523
10524- // Re-use the trip count and steps expanded for the main loop, as
10525- // skeleton creation needs it as a value that dominates both the scalar
10526- // and vector epilogue loops
10524+ // For some VPValues in the epilogue plan we must re-use the generated IR
10525+ // values from the main plan. Replace them with live-in VPValues.
1052710526 // TODO: This is a workaround needed for epilogue vectorization and it
1052810527 // should be removed once induction resume value creation is done
1052910528 // directly in VPlan.
1053010529 for (auto &R : make_early_inc_range (*Plan.getEntry ())) {
10530+ // Re-use frozen values from the main plan for Freeze VPInstructions in the
10531+ // epilogue plan. This ensures all users use the same frozen value.
1053110532 auto *VPI = dyn_cast<VPInstruction>(&R);
1053210533 if (VPI && VPI->getOpcode () == Instruction::Freeze) {
1053310534 VPI->replaceAllUsesWith (Plan.getOrAddLiveIn (
1053410535 ToFrozen.lookup (VPI->getOperand (0 )->getLiveInIRValue ())));
1053510536 continue ;
1053610537 }
1053710538
10539+ // Re-use the trip count and steps expanded for the main loop, as
10540+ // skeleton creation needs it as a value that dominates both the scalar
10541+ // and vector epilogue loops
1053810542 auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
1053910543 if (!ExpandR)
1054010544 continue ;
0 commit comments