@@ -7561,48 +7561,52 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
75617561 }
75627562}
75637563
7564- // If \p RedResult is a ComputeReductionResult when vectorizing the epilog loop,
7564+ // If \p R is a ComputeReductionResult when vectorizing the epilog loop,
75657565// update the reduction's scalar PHI node by adding the incoming value from the
75667566// main vector loop.
75677567static void updateMergePhiForReductionForEpilogueVectorization (
7568- VPInstruction *RedResult , VPTransformState &State, Loop *OrigLoop,
7568+ VPRecipeBase *R , VPTransformState &State, Loop *OrigLoop,
75697569 BasicBlock *LoopMiddleBlock) {
7570+ auto *RedResult = dyn_cast<VPInstruction>(R);
75707571 if (!RedResult ||
75717572 RedResult->getOpcode () != VPInstruction::ComputeReductionResult)
75727573 return ;
75737574
7574- using namespace VPlanPatternMatch ;
7575- VPValue *ResumePhiVPV =
7576- cast<VPInstruction>(*find_if (RedResult->users (), [](VPUser *U) {
7577- return match (U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (),
7578- m_VPValue ()));
7579- }));
7580- assert (ResumePhiVPV->getNumUsers () == 1 &&
7581- " ResumePhi must have a single user" );
7582- auto *BCBlockPhi = cast<PHINode>(State.get (ResumePhiVPV, true ));
75837575 auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
75847576 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7585- auto *ResumePhi =
7586- dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7577+ PHINode *MainResumePhi;
75877578 if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
75887579 RdxDesc.getRecurrenceKind ())) {
75897580 auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
75907581 assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
75917582 assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
7592- ResumePhi = cast<PHINode>(Cmp->getOperand (0 ));
7583+ MainResumePhi = cast<PHINode>(Cmp->getOperand (0 ));
7584+ } else {
7585+ MainResumePhi = cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
75937586 }
7594- assert (ResumePhi &&
7595- " when vectorizing the epilogue loop, we need a resume phi from main "
7596- " vector loop" );
75977587
7598- BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7599- // When fixing reductions in the epilogue loop then we should already have
7588+ // When fixing reductions in the epilogue loop we should already have
76007589 // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
76017590 // over the incoming values correctly.
7591+ using namespace VPlanPatternMatch ;
7592+ auto IsResumePhi = [](VPUser *U) {
7593+ return match (
7594+ U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (), m_VPValue ()));
7595+ };
7596+ auto *EpiResumePhiVPI =
7597+ cast<VPInstruction>(*find_if (RedResult->users (), IsResumePhi));
7598+ assert (count_if (RedResult->users (), IsResumePhi) == 1 &&
7599+ " ResumePhi must have a single user" );
7600+ auto *EpiResumePhi = cast<PHINode>(State.get (EpiResumePhiVPI, true ));
7601+ BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
76027602 for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7603- if (is_contained (ResumePhi->blocks (), Incoming))
7604- BCBlockPhi->setIncomingValueForBlock (
7605- Incoming, ResumePhi->getIncomingValueForBlock (Incoming));
7603+ if (is_contained (MainResumePhi->blocks (), Incoming)) {
7604+ assert (EpiResumePhi->getIncomingValueForBlock (Incoming) ==
7605+ RdxDesc.getRecurrenceStartValue () &&
7606+ " Trying to reset unexpected value" );
7607+ EpiResumePhi->setIncomingValueForBlock (
7608+ Incoming, MainResumePhi->getIncomingValueForBlock (Incoming));
7609+ }
76067610 }
76077611}
76087612
@@ -7617,7 +7621,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76177621 assert (
76187622 (IsEpilogueVectorization || !ExpandedSCEVs) &&
76197623 " expanded SCEVs to reuse can only be used during epilogue vectorization" );
7620- (void )IsEpilogueVectorization;
76217624
76227625 // TODO: Move to VPlan transform stage once the transition to the VPlan-based
76237626 // cost model is complete for better cost estimates.
@@ -7694,11 +7697,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76947697 // 2.5 Collect reduction resume values.
76957698 auto *ExitVPBB =
76967699 cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7697- if (IsEpilogueVectorization && ExpandedSCEVs )
7700+ if (IsEpilogueVectorization)
76987701 for (VPRecipeBase &R : *ExitVPBB) {
76997702 updateMergePhiForReductionForEpilogueVectorization (
7700- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7701- State.CFG .VPBB2IRBB [ExitVPBB]);
7703+ &R, State, OrigLoop, State.CFG .VPBB2IRBB [ExitVPBB]);
77027704 }
77037705
77047706 // 2.6. Maintain Loop Hints
@@ -10232,7 +10234,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1023210234
1023310235 std::unique_ptr<VPlan> BestMainPlan (BestPlan.duplicate ());
1023410236 auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10235- *BestMainPlan, MainILV, DT, true );
10237+ *BestMainPlan, MainILV, DT, false );
1023610238 ++LoopsVectorized;
1023710239
1023810240 // Second pass vectorizes the epilogue and adjusts the control flow
0 commit comments