@@ -7562,67 +7562,62 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
75627562 }
75637563}
75647564
7565- // Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7566- // create a merge phi node for it.
7567- static void createAndCollectMergePhiForReduction (
7568- VPInstruction *RedResult,
7569- VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
7570- bool VectorizingEpilogue) {
7571- if (!RedResult ||
7572- RedResult ->getOpcode () != VPInstruction::ComputeReductionResult)
7565+ // If \p R is a ComputeReductionResult when vectorizing the epilog loop,
7566+ // fix the reduction's scalar PHI node by adding the incoming value from the
7567+ // main vector loop.
7568+ static void fixReductionScalarResumeWhenVectorizingEpilog (
7569+ VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock) {
7570+ auto *EpiRedResult = dyn_cast<VPInstruction>(R);
7571+ if (!EpiRedResult ||
7572+ EpiRedResult ->getOpcode () != VPInstruction::ComputeReductionResult)
75737573 return ;
75747574
7575- auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
7576- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7577-
7578- Value *FinalValue = State.get (RedResult, VPLane (VPLane::getFirstLane ()));
7579- auto *ResumePhi =
7580- dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7581- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind (
7582- RdxDesc.getRecurrenceKind ())) {
7583- auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
7584- assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
7585- assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
7586- ResumePhi = cast<PHINode>(Cmp->getOperand (0 ));
7587- }
7588- assert ((!VectorizingEpilogue || ResumePhi) &&
7589- " when vectorizing the epilogue loop, we need a resume phi from main "
7590- " vector loop" );
7591-
7592- // TODO: bc.merge.rdx should not be created here, instead it should be
7593- // modeled in VPlan.
7594- BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7595- // Create a phi node that merges control-flow from the backedge-taken check
7596- // block and the middle block.
7597- auto *BCBlockPhi =
7598- PHINode::Create (FinalValue->getType (), 2 , " bc.merge.rdx" ,
7599- LoopScalarPreHeader->getTerminator ()->getIterator ());
7600-
7601- // If we are fixing reductions in the epilogue loop then we should already
7602- // have created a bc.merge.rdx Phi after the main vector body. Ensure that
7603- // we carry over the incoming values correctly.
7575+ auto *EpiRedHeaderPhi =
7576+ cast<VPReductionPHIRecipe>(EpiRedResult->getOperand (0 ));
7577+ const RecurrenceDescriptor &RdxDesc =
7578+ EpiRedHeaderPhi->getRecurrenceDescriptor ();
7579+ Value *MainResumeValue =
7580+ EpiRedHeaderPhi->getStartValue ()->getUnderlyingValue ();
7581+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
7582+ RdxDesc.getRecurrenceKind ())) {
7583+ auto *Cmp = cast<ICmpInst>(MainResumeValue);
7584+ assert (Cmp->getPredicate () == CmpInst::ICMP_NE &&
7585+ " AnyOf expected to start with ICMP_NE" );
7586+ assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue () &&
7587+ " AnyOf expected to start by comparing main resume value to original "
7588+ " start value" );
7589+ MainResumeValue = Cmp->getOperand (0 );
7590+ }
7591+ PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
7592+
7593+ // When fixing reductions in the epilogue loop we should already have
7594+ // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
7595+ // over the incoming values correctly.
7596+ using namespace VPlanPatternMatch ;
7597+ auto IsResumePhi = [](VPUser *U) {
7598+ return match (
7599+ U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (), m_VPValue ()));
7600+ };
7601+ assert (count_if (EpiRedResult->users (), IsResumePhi) == 1 &&
7602+ " ResumePhi must have a single user" );
7603+ auto *EpiResumePhiVPI =
7604+ cast<VPInstruction>(*find_if (EpiRedResult->users (), IsResumePhi));
7605+ auto *EpiResumePhi = cast<PHINode>(State.get (EpiResumePhiVPI, true ));
7606+ BasicBlock *LoopScalarPreHeader = EpiResumePhi->getParent ();
7607+ bool Updated = false ;
76047608 for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7605- if (Incoming == LoopMiddleBlock)
7606- BCBlockPhi->addIncoming (FinalValue, Incoming);
7607- else if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7608- BCBlockPhi->addIncoming (ResumePhi->getIncomingValueForBlock (Incoming),
7609- Incoming);
7610- else
7611- BCBlockPhi->addIncoming (RdxDesc.getRecurrenceStartValue (), Incoming);
7609+ if (is_contained (MainResumePhi->blocks (), Incoming)) {
7610+ assert (EpiResumePhi->getIncomingValueForBlock (Incoming) ==
7611+ RdxDesc.getRecurrenceStartValue () &&
7612+ " Trying to reset unexpected value" );
7613+ assert (!Updated && " Should update at most 1 incoming value" );
7614+ EpiResumePhi->setIncomingValueForBlock (
7615+ Incoming, MainResumePhi->getIncomingValueForBlock (Incoming));
7616+ Updated = true ;
7617+ }
76127618 }
7613-
7614- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
7615- // TODO: This fixup should instead be modeled in VPlan.
7616- // Fix the scalar loop reduction variable with the incoming reduction sum
7617- // from the vector body and from the backedge value.
7618- int IncomingEdgeBlockIdx =
7619- OrigPhi->getBasicBlockIndex (OrigLoop->getLoopLatch ());
7620- assert (IncomingEdgeBlockIdx >= 0 && " Invalid block index" );
7621- // Pick the other block.
7622- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1 );
7623- OrigPhi->setIncomingValue (SelfEdgeBlockIdx, BCBlockPhi);
7624- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr ();
7625- OrigPhi->setIncomingValue (IncomingEdgeBlockIdx, LoopExitInst);
7619+ assert (Updated && " Must update EpiResumePhi." );
7620+ (void )Updated;
76267621}
76277622
76287623DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
@@ -7713,11 +7708,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77137708 // 2.5 Collect reduction resume values.
77147709 auto *ExitVPBB =
77157710 cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7716- for (VPRecipeBase &R : *ExitVPBB) {
7717- createAndCollectMergePhiForReduction (
7718- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7719- State.CFG .VPBB2IRBB [ExitVPBB], VectorizingEpilogue );
7720- }
7711+ if (VectorizingEpilogue)
7712+ for (VPRecipeBase &R : *ExitVPBB) {
7713+ fixReductionScalarResumeWhenVectorizingEpilog (
7714+ &R, State, State .CFG .VPBB2IRBB [ExitVPBB]);
7715+ }
77217716
77227717 // 2.6. Maintain Loop Hints
77237718 // Keep all loop hints from the original loop on the vector loop (we'll
@@ -9518,6 +9513,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
95189513 });
95199514 FinalReductionResult->insertBefore (*MiddleVPBB, IP);
95209515
9516+ // Order is strict: if there are multiple successors, the first is the exit
9517+ // block, second is the scalar preheader.
9518+ VPBasicBlock *ScalarPHVPBB =
9519+ cast<VPBasicBlock>(MiddleVPBB->getSuccessors ().back ());
9520+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9521+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9522+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9523+ {}, " bc.merge.rdx" );
9524+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9525+ Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9526+
95219527 // Adjust AnyOf reductions; replace the reduction phi for the selected value
95229528 // with a boolean reduction phi node to check if the condition is true in
95239529 // any iteration. The final value is selected by the final
0 commit comments