@@ -498,7 +498,7 @@ class InnerLoopVectorizer {
498498 virtual std::pair<BasicBlock *, Value *>
499499 createVectorizedLoopSkeleton (const SCEV2ValueTy &ExpandedSCEVs);
500500
501- // / Fix the vectorized code, taking care of header phi's, live-outs, and more.
501+ // / Fix the vectorized code, taking care of header phi's, and more.
502502 void fixVectorizedLoop (VPTransformState &State);
503503
504504 // Return true if any runtime check is added.
@@ -2713,7 +2713,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
27132713 | |
27142714 (opt) v <-- edge from middle to exit iff epilogue is not required.
27152715 | [ ] \
2716- | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue).
2716+ | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, header
2717+ | | wrapped in VPIRBasicBlock).
27172718 \ |
27182719 \ v
27192720 >[ ] <-- exit block(s). (wrapped in VPIRBasicBlock)
@@ -2956,7 +2957,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29562957 // and there is nothing to fix from vector loop; phis should have incoming
29572958 // from scalar loop only.
29582959 } else {
2959- // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2960+ // TODO: Check in VPlan to see if IV users need fixing instead of checking
29602961 // the cost model.
29612962
29622963 // If we inserted an edge from the middle block to the unique exit block,
@@ -2970,10 +2971,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29702971 IVEndValues[Entry.first ], LoopMiddleBlock, State);
29712972 }
29722973
2973- // Fix live-out phis not already fixed earlier.
2974- for (const auto &KV : Plan.getLiveOuts ())
2975- KV.second ->fixPhi (Plan, State);
2976-
29772974 for (Instruction *PI : PredicatedInstructions)
29782975 sinkScalarOperands (&*PI);
29792976
@@ -8790,6 +8787,41 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
87908787 {CanonicalIVIncrement, &Plan.getVectorTripCount ()}, DL);
87918788}
87928789
8790+ // / Create resume phis in the scalar preheader for first-order recurrences and
8791+ // / reductions and update the VPIRInstructions wrapping the original phis in the
8792+ // / scalar header.
8793+ static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan) {
8794+ auto *ScalarPH = Plan.getScalarPreheader ();
8795+ auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
8796+ VPBuilder ScalarPHBuilder (ScalarPH);
8797+ VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8798+ VPValue *OneVPV = Plan.getOrAddLiveIn (
8799+ ConstantInt::get (Plan.getCanonicalIV ()->getScalarType (), 1 ));
8800+ for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader ()) {
8801+ auto *ScalarPhiIRI = cast<VPIRInstruction>(&ScalarPhiR);
8802+ auto *ScalarPhiI = dyn_cast<PHINode>(&ScalarPhiIRI->getInstruction ());
8803+ if (!ScalarPhiI)
8804+ break ;
8805+ auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe (ScalarPhiI));
8806+ if (!isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(VectorPhiR))
8807+ continue ;
8808+ // The backedge value provides the value to resume coming out of a loop,
8809+ // which for FORs is a vector whose last element needs to be extracted. The
8810+ // start value provides the value if the loop is bypassed.
8811+ bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8812+ auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue ();
8813+ if (IsFOR)
8814+ ResumeFromVectorLoop = MiddleBuilder.createNaryOp (
8815+ VPInstruction::ExtractFromEnd, {ResumeFromVectorLoop, OneVPV}, {},
8816+ " vector.recur.extract" );
8817+ StringRef Name = IsFOR ? " scalar.recur.init" : " bc.merge.rdx" ;
8818+ auto *ResumePhiR = ScalarPHBuilder.createNaryOp (
8819+ VPInstruction::ResumePhi,
8820+ {ResumeFromVectorLoop, VectorPhiR->getStartValue ()}, {}, Name);
8821+ ScalarPhiIRI->addOperand (ResumePhiR);
8822+ }
8823+ }
8824+
87938825// Collect VPIRInstructions for phis in the original exit block that are modeled
87948826// in VPlan and add the exiting VPValue as operand. Some exiting values are not
87958827// modeled explicitly yet and won't be included. Those are un-truncated
@@ -8819,8 +8851,7 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlock(
88198851 VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
88208852 // Exit values for inductions are computed and updated outside of VPlan and
88218853 // independent of induction recipes.
8822- // TODO: Compute induction exit values in VPlan, use VPLiveOuts to update
8823- // live-outs.
8854+ // TODO: Compute induction exit values in VPlan.
88248855 if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
88258856 !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
88268857 isa<VPWidenPointerInductionRecipe>(V) ||
@@ -8853,7 +8884,8 @@ addUsersInExitBlock(VPlan &Plan,
88538884 // modeling the corresponding LCSSA phis.
88548885 for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
88558886 VPValue *V = ExitIRI->getOperand (0 );
8856- // Pass live-in values used by exit phis directly through to the live-out.
8887+ // Pass live-in values used by exit phis directly through to their users in
8888+ // the exit block.
88578889 if (V->isLiveIn ())
88588890 continue ;
88598891
@@ -8865,39 +8897,17 @@ addUsersInExitBlock(VPlan &Plan,
88658897 }
88668898}
88678899
8868- // / Handle live-outs for first order reductions, both in the scalar preheader
8869- // / and the original exit block:
8870- // / 1. Feed a resume value for every FOR from the vector loop to the scalar
8871- // / loop, if middle block branches to scalar preheader, by introducing
8872- // / ExtractFromEnd and ResumePhi recipes in each, respectively, and a
8873- // / VPLiveOut which uses the latter and corresponds to the scalar header.
8874- // / 2. Feed the penultimate value of recurrences to their LCSSA phi users in
8875- // / the original exit block using a VPLiveOut.
8876- static void addLiveOutsForFirstOrderRecurrences (
8900+ // / Handle users in the exit block for first order reductions in the original
8901+ // / exit block. The penultimate value of recurrences is fed to their LCSSA phi
8902+ // / users in the original exit block using the VPIRInstruction wrapping to the
8903+ // / LCSSA phi.
8904+ static void addExitUsersForFirstOrderRecurrences (
88778905 VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) {
88788906 VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
8879-
8880- // Start by finding out if middle block branches to scalar preheader, which is
8881- // not a VPIRBasicBlock, unlike Exit block - the other possible successor of
8882- // middle block.
8883- // TODO: Should be replaced by
8884- // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8885- // scalar region is modeled as well.
8907+ auto *ScalarPHVPBB = Plan.getScalarPreheader ();
88868908 auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor ());
8887- VPBasicBlock *ScalarPHVPBB = nullptr ;
8888- if (MiddleVPBB->getNumSuccessors () == 2 ) {
8889- // Order is strict: first is the exit block, second is the scalar preheader.
8890- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
8891- } else if (ExitUsersToFix.empty ()) {
8892- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
8893- } else {
8894- llvm_unreachable (" unsupported CFG in VPlan" );
8895- }
8896-
88978909 VPBuilder ScalarPHBuilder (ScalarPHVPBB);
88988910 VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8899- VPValue *OneVPV = Plan.getOrAddLiveIn (
8900- ConstantInt::get (Plan.getCanonicalIV ()->getScalarType (), 1 ));
89018911 VPValue *TwoVPV = Plan.getOrAddLiveIn (
89028912 ConstantInt::get (Plan.getCanonicalIV ()->getScalarType (), 2 ));
89038913
@@ -8973,26 +8983,16 @@ static void addLiveOutsForFirstOrderRecurrences(
89738983 // lo = lcssa.phi [s1, scalar.body],
89748984 // [vector.recur.extract.for.phi, middle.block]
89758985 //
8976- // Extract the resume value and create a new VPLiveOut for it.
8977- auto *Resume = MiddleBuilder.createNaryOp (VPInstruction::ExtractFromEnd,
8978- {FOR->getBackedgeValue (), OneVPV},
8979- {}, " vector.recur.extract" );
8980- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
8981- VPInstruction::ResumePhi, {Resume, FOR->getStartValue ()}, {},
8982- " scalar.recur.init" );
8983- auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr ());
8984- Plan.addLiveOut (FORPhi, ResumePhiRecipe);
8985-
89868986 // Now update VPIRInstructions modeling LCSSA phis in the exit block.
89878987 // Extract the penultimate value of the recurrence and use it as operand for
89888988 // the VPIRInstruction modeling the phi.
89898989 for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
89908990 if (ExitIRI->getOperand (0 ) != FOR)
89918991 continue ;
8992- VPValue *Ext = MiddleBuilder.createNaryOp (
8992+ VPValue *PenultimateElement = MiddleBuilder.createNaryOp (
89938993 VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue (), TwoVPV}, {},
89948994 " vector.recur.extract.for.phi" );
8995- ExitIRI->setOperand (0 , Ext );
8995+ ExitIRI->setOperand (0 , PenultimateElement );
89968996 ExitUsersToFix.remove (ExitIRI);
89978997 }
89988998 }
@@ -9166,11 +9166,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91669166 " VPBasicBlock" );
91679167 RecipeBuilder.fixHeaderPhis ();
91689168
9169+ addScalarResumePhis (RecipeBuilder, *Plan);
91699170 SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
91709171 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9171- addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9172+ addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
91729173 addUsersInExitBlock (*Plan, ExitUsersToFix);
9173-
91749174 // ---------------------------------------------------------------------------
91759175 // Transform initial VPlan: Apply previously taken decisions, in order, to
91769176 // bring the VPlan to its final state.
@@ -9192,9 +9192,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91929192 // Replace VPValues for known constant strides guaranteed by predicate scalar
91939193 // evolution.
91949194 auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned ) {
9195- auto *R = dyn_cast<VPRecipeBase>(&U);
9196- if (!R)
9197- return false ;
9195+ auto *R = cast<VPRecipeBase>(&U);
91989196 return R->getParent ()->getParent () ||
91999197 R->getParent () ==
92009198 Plan->getVectorLoopRegion ()->getSinglePredecessor ();
@@ -9291,7 +9289,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
92919289// instructions leading from the loop exit instr to the phi need to be converted
92929290// to reductions, with one operand being vector and the other being the scalar
92939291// reduction chain. For other reductions, a select is introduced between the phi
9294- // and live-out recipes when folding the tail.
9292+ // and users outside the vector region when folding the tail.
92959293//
92969294// A ComputeReductionResult recipe is added to the middle block, also for
92979295// in-loop reductions which compute their result in-loop, because generating
@@ -9325,8 +9323,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93259323 for (VPUser *U : Cur->users ()) {
93269324 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
93279325 if (!UserRecipe->getParent ()->getEnclosingLoopRegion ()) {
9328- assert (UserRecipe->getParent () == MiddleVPBB &&
9329- " U must be either in the loop region or the middle block." );
9326+ assert ((UserRecipe->getParent () == MiddleVPBB ||
9327+ UserRecipe->getParent () == Plan->getScalarPreheader ()) &&
9328+ " U must be either in the loop region, the middle block or the "
9329+ " scalar preheader." );
93309330 continue ;
93319331 }
93329332 Worklist.insert (UserRecipe);
@@ -9440,8 +9440,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
94409440
94419441 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
94429442 // If tail is folded by masking, introduce selects between the phi
9443- // and the live-out instruction of each reduction, at the beginning of the
9444- // dedicated latch block.
9443+ // and the users outside the vector region of each reduction, at the
9444+ // beginning of the dedicated latch block.
94459445 auto *OrigExitingVPV = PhiR->getBackedgeValue ();
94469446 auto *NewExitingVPV = PhiR->getBackedgeValue ();
94479447 if (!PhiR->isInLoop () && CM.foldTailByMasking ()) {
@@ -9513,17 +9513,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
95139513 });
95149514 FinalReductionResult->insertBefore (*MiddleVPBB, IP);
95159515
9516- // Order is strict: if there are multiple successors, the first is the exit
9517- // block, second is the scalar preheader.
9518- VPBasicBlock *ScalarPHVPBB =
9519- cast<VPBasicBlock>(MiddleVPBB->getSuccessors ().back ());
9520- VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9521- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9522- VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9523- {}, " bc.merge.rdx" );
9524- auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9525- Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9526-
95279516 // Adjust AnyOf reductions; replace the reduction phi for the selected value
95289517 // with a boolean reduction phi node to check if the condition is true in
95299518 // any iteration. The final value is selected by the final
0 commit comments