@@ -7659,14 +7659,17 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
76597659 } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (
76607660 RdxDesc.getRecurrenceKind ())) {
76617661 using namespace llvm ::PatternMatch;
7662- Value *Cmp, *OrigResumeV;
7662+ Value *Cmp, *OrigResumeV, *CmpOp ;
76637663 bool IsExpectedPattern =
76647664 match (MainResumeValue, m_Select (m_OneUse (m_Value (Cmp)),
76657665 m_Specific (RdxDesc.getSentinelValue ()),
76667666 m_Value (OrigResumeV))) &&
7667- match (Cmp,
7668- m_SpecificICmp (ICmpInst::ICMP_EQ, m_Specific (OrigResumeV),
7669- m_Specific (RdxDesc.getRecurrenceStartValue ())));
7667+ (match (Cmp, m_SpecificICmp (ICmpInst::ICMP_EQ, m_Specific (OrigResumeV),
7668+ m_Value (CmpOp))) &&
7669+ (match (CmpOp,
7670+ m_Freeze (m_Specific (RdxDesc.getRecurrenceStartValue ()))) ||
7671+ (CmpOp == RdxDesc.getRecurrenceStartValue () &&
7672+ isGuaranteedNotToBeUndefOrPoison (CmpOp))));
76707673 assert (IsExpectedPattern && " Unexpected reduction resume pattern" );
76717674 (void )IsExpectedPattern;
76727675 MainResumeValue = OrigResumeV;
@@ -10374,6 +10377,36 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
1037410377 VPlanTransforms::runPass (VPlanTransforms::removeDeadRecipes, MainPlan);
1037510378
1037610379 using namespace VPlanPatternMatch ;
10380+ // When vectorizing the epilogue, FindLastIV reductions can introduce multiple
10381+ // uses of undef/poison. If the reduction start value may be undef or poison
10382+ // it needs to be frozen and the frozen start has to be used when computing
10383+ // the reduction result. We also need to use the frozen value in the resume
10384+ // phi generated by the main vector loop, as this is also used to compute the
10385+ // reduction result after the epilogue vector loop.
10386+ auto AddFreezeForFindLastIVReductions = [](VPlan &Plan,
10387+ bool UpdateResumePhis) {
10388+ VPBuilder Builder (Plan.getEntry ());
10389+ for (VPRecipeBase &R : *Plan.getMiddleBlock ()) {
10390+ auto *VPI = dyn_cast<VPInstruction>(&R);
10391+ if (!VPI || VPI->getOpcode () != VPInstruction::ComputeFindLastIVResult)
10392+ continue ;
10393+ VPValue *OrigStart = VPI->getOperand (1 );
10394+ if (isGuaranteedNotToBeUndefOrPoison (OrigStart->getLiveInIRValue ()))
10395+ continue ;
10396+ VPInstruction *Freeze =
10397+ Builder.createNaryOp (Instruction::Freeze, {OrigStart}, {}, " fr" );
10398+ VPI->setOperand (1 , Freeze);
10399+ if (UpdateResumePhis)
10400+ OrigStart->replaceUsesWithIf (Freeze, [Freeze](VPUser &U, unsigned ) {
10401+ return Freeze != &U && isa<VPInstruction>(&U) &&
10402+ cast<VPInstruction>(&U)->getOpcode () ==
10403+ VPInstruction::ResumePhi;
10404+ });
10405+ }
10406+ };
10407+ AddFreezeForFindLastIVReductions (MainPlan, true );
10408+ AddFreezeForFindLastIVReductions (EpiPlan, false );
10409+
1037710410 VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader ();
1037810411 VPValue *VectorTC = &MainPlan.getVectorTripCount ();
1037910412 // If there is a suitable resume value for the canonical induction in the
@@ -10401,24 +10434,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1040110434 VPBasicBlock *Header = VectorLoop->getEntryBasicBlock ();
1040210435 Header->setName (" vec.epilog.vector.body" );
1040310436
10404- // Re-use the trip count and steps expanded for the main loop, as
10405- // skeleton creation needs it as a value that dominates both the scalar
10406- // and vector epilogue loops
10407- // TODO: This is a workaround needed for epilogue vectorization and it
10408- // should be removed once induction resume value creation is done
10409- // directly in VPlan.
10410- for (auto &R : make_early_inc_range (*Plan.getEntry ())) {
10411- auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10412- if (!ExpandR)
10413- continue ;
10414- auto *ExpandedVal =
10415- Plan.getOrAddLiveIn (ExpandedSCEVs.find (ExpandR->getSCEV ())->second );
10416- ExpandR->replaceAllUsesWith (ExpandedVal);
10417- if (Plan.getTripCount () == ExpandR)
10418- Plan.resetTripCount (ExpandedVal);
10419- ExpandR->eraseFromParent ();
10420- }
10421-
10437+ DenseMap<Value *, Value *> ToFrozen;
1042210438 // Ensure that the start values for all header phi recipes are updated before
1042310439 // vectorizing the epilogue loop.
1042410440 for (VPRecipeBase &R : Header->phis ()) {
@@ -10484,6 +10500,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1048410500 ResumeV =
1048510501 Builder.CreateICmpNE (ResumeV, RdxDesc.getRecurrenceStartValue ());
1048610502 } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (RK)) {
10503+ ToFrozen[RdxDesc.getRecurrenceStartValue ()] =
10504+ cast<PHINode>(ResumeV)->getIncomingValueForBlock (
10505+ EPI.MainLoopIterationCountCheck );
10506+
1048710507 // VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
1048810508 // to the resume value. The resume value is adjusted to the sentinel
1048910509 // value when the final value from the main vector loop equals the start
@@ -10492,8 +10512,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1049210512 // variable.
1049310513 BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent ();
1049410514 IRBuilder<> Builder (ResumeBB, ResumeBB->getFirstNonPHIIt ());
10495- Value *Cmp =
10496- Builder. CreateICmpEQ ( ResumeV, RdxDesc.getRecurrenceStartValue ());
10515+ Value *Cmp = Builder. CreateICmpEQ (
10516+ ResumeV, ToFrozen[ RdxDesc.getRecurrenceStartValue ()] );
1049710517 ResumeV =
1049810518 Builder.CreateSelect (Cmp, RdxDesc.getSentinelValue (), ResumeV);
1049910519 }
@@ -10509,6 +10529,35 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1050910529 VPValue *StartVal = Plan.getOrAddLiveIn (ResumeV);
1051010530 cast<VPHeaderPHIRecipe>(&R)->setStartValue (StartVal);
1051110531 }
10532+
10533+ // For some VPValues in the epilogue plan we must re-use the generated IR
10534+ // values from the main plan. Replace them with live-in VPValues.
10535+ // TODO: This is a workaround needed for epilogue vectorization and it
10536+ // should be removed once induction resume value creation is done
10537+ // directly in VPlan.
10538+ for (auto &R : make_early_inc_range (*Plan.getEntry ())) {
10539+ // Re-use frozen values from the main plan for Freeze VPInstructions in the
10540+ // epilogue plan. This ensures all users use the same frozen value.
10541+ auto *VPI = dyn_cast<VPInstruction>(&R);
10542+ if (VPI && VPI->getOpcode () == Instruction::Freeze) {
10543+ VPI->replaceAllUsesWith (Plan.getOrAddLiveIn (
10544+ ToFrozen.lookup (VPI->getOperand (0 )->getLiveInIRValue ())));
10545+ continue ;
10546+ }
10547+
10548+ // Re-use the trip count and steps expanded for the main loop, as
10549+ // skeleton creation needs it as a value that dominates both the scalar
10550+ // and vector epilogue loops
10551+ auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10552+ if (!ExpandR)
10553+ continue ;
10554+ auto *ExpandedVal =
10555+ Plan.getOrAddLiveIn (ExpandedSCEVs.find (ExpandR->getSCEV ())->second );
10556+ ExpandR->replaceAllUsesWith (ExpandedVal);
10557+ if (Plan.getTripCount () == ExpandR)
10558+ Plan.resetTripCount (ExpandedVal);
10559+ ExpandR->eraseFromParent ();
10560+ }
1051210561}
1051310562
1051410563// Generate bypass values from the additional bypass block. Note that when the
0 commit comments