@@ -1423,6 +1423,11 @@ class LoopVectorizationCostModel {
14231423 return InLoopReductions.contains (Phi);
14241424 }
14251425
1426+ // / Returns the set of in-loop reduction PHIs.
1427+ const SmallPtrSetImpl<PHINode *> &getInLoopReductions () const {
1428+ return InLoopReductions;
1429+ }
1430+
14261431 // / Returns true if the predicated reduction select should be used to set the
14271432 // / incoming value for the reduction phi.
14281433 bool usePredicatedReductionSelect () const {
@@ -7626,53 +7631,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
76267631 VPIRMetadata (*Store, LVer), VPI->getDebugLoc ());
76277632}
76287633
7629- // / Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
7630- // / also insert a recipe to expand the step for the induction recipe.
7631- static VPWidenIntOrFpInductionRecipe *
7632- createWidenInductionRecipes (VPInstruction *PhiR,
7633- const InductionDescriptor &IndDesc, VPlan &Plan,
7634- ScalarEvolution &SE, Loop &OrigLoop) {
7635- assert (SE.isLoopInvariant (IndDesc.getStep (), &OrigLoop) &&
7636- " step must be loop invariant" );
7637-
7638- VPValue *Start = PhiR->getOperand (0 );
7639- assert (Plan.getLiveIn (IndDesc.getStartValue ()) == Start &&
7640- " Start VPValue must match IndDesc's start value" );
7641-
7642- VPValue *Step =
7643- vputils::getOrCreateVPValueForSCEVExpr (Plan, IndDesc.getStep ());
7644-
7645- // Update wide induction increments to use the same step as the corresponding
7646- // wide induction. This enables detecting induction increments directly in
7647- // VPlan and removes redundant splats.
7648- using namespace llvm ::VPlanPatternMatch;
7649- if (match (PhiR->getOperand (1 ), m_Add (m_Specific (PhiR), m_VPValue ())))
7650- PhiR->getOperand (1 )->getDefiningRecipe ()->setOperand (1 , Step);
7651-
7652- PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr ());
7653- return new VPWidenIntOrFpInductionRecipe (Phi, Start, Step, &Plan.getVF (),
7654- IndDesc, PhiR->getDebugLoc ());
7655- }
7656-
7657- VPHeaderPHIRecipe *
7658- VPRecipeBuilder::tryToOptimizeInductionPHI (VPInstruction *VPI) {
7659- auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr ());
7660-
7661- // Check if this is an integer or fp induction. If so, build the recipe that
7662- // produces its scalar and vector values.
7663- if (auto *II = Legal->getIntOrFpInductionDescriptor (Phi))
7664- return createWidenInductionRecipes (VPI, *II, Plan, *PSE.getSE (), *OrigLoop);
7665-
7666- // Check if this is pointer induction. If so, build the recipe for it.
7667- if (auto *II = Legal->getPointerInductionDescriptor (Phi)) {
7668- VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr (Plan, II->getStep ());
7669- return new VPWidenPointerInductionRecipe (Phi, VPI->getOperand (0 ), Step,
7670- &Plan.getVFxUF (), *II,
7671- VPI->getDebugLoc ());
7672- }
7673- return nullptr ;
7674- }
7675-
76767634VPWidenIntOrFpInductionRecipe *
76777635VPRecipeBuilder::tryToOptimizeInductionTruncate (VPInstruction *VPI,
76787636 VFRange &Range) {
@@ -8149,45 +8107,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
81498107 // First, check for specific widening recipes that deal with inductions, Phi
81508108 // nodes, calls and memory operations.
81518109 VPRecipeBase *Recipe;
8152- if (auto *PhiR = dyn_cast<VPPhi>(R)) {
8153- VPBasicBlock *Parent = PhiR->getParent ();
8154- [[maybe_unused]] VPRegionBlock *LoopRegionOf =
8155- Parent->getEnclosingLoopRegion ();
8156- assert (LoopRegionOf && LoopRegionOf->getEntry () == Parent &&
8157- " Non-header phis should have been handled during predication" );
8158- auto *Phi = cast<PHINode>(R->getUnderlyingInstr ());
8159- assert (R->getNumOperands () == 2 && " Must have 2 operands for header phis" );
8160- if ((Recipe = tryToOptimizeInductionPHI (PhiR)))
8161- return Recipe;
8162-
8163- VPHeaderPHIRecipe *PhiRecipe = nullptr ;
8164- assert ((Legal->isReductionVariable (Phi) ||
8165- Legal->isFixedOrderRecurrence (Phi)) &&
8166- " can only widen reductions and fixed-order recurrences here" );
8167- VPValue *StartV = R->getOperand (0 );
8168- if (Legal->isReductionVariable (Phi)) {
8169- const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor (Phi);
8170- assert (RdxDesc.getRecurrenceStartValue () ==
8171- Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8172-
8173- // If the PHI is used by a partial reduction, set the scale factor.
8174- unsigned ScaleFactor =
8175- getScalingForReduction (RdxDesc.getLoopExitInstr ()).value_or (1 );
8176- PhiRecipe = new VPReductionPHIRecipe (
8177- Phi, RdxDesc.getRecurrenceKind (), *StartV, CM.isInLoopReduction (Phi),
8178- CM.useOrderedReductions (RdxDesc), ScaleFactor);
8179- } else {
8180- // TODO: Currently fixed-order recurrences are modeled as chains of
8181- // first-order recurrences. If there are no users of the intermediate
8182- // recurrences in the chain, the fixed order recurrence should be modeled
8183- // directly, enabling more efficient codegen.
8184- PhiRecipe = new VPFirstOrderRecurrencePHIRecipe (Phi, *StartV);
8185- }
8186- // Add backedge value.
8187- PhiRecipe->addOperand (R->getOperand (1 ));
8188- return PhiRecipe;
8189- }
8190- assert (!R->isPhi () && " only VPPhi nodes expected at this point" );
8110+ assert (!R->isPhi () && " phis must be handled earlier" );
81918111
81928112 auto *VPI = cast<VPInstruction>(R);
81938113 Instruction *Instr = R->getUnderlyingInstr ();
@@ -8244,6 +8164,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
82448164 if (isa<VPReductionPHIRecipe>(BinOp) || isa<VPPartialReductionRecipe>(BinOp))
82458165 std::swap (BinOp, Accumulator);
82468166
8167+ if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator))
8168+ RedPhiR->setVFScaleFactor (ScaleFactor);
8169+
82478170 assert (ScaleFactor ==
82488171 vputils::getVFScaleFactor (Accumulator->getDefiningRecipe ()) &&
82498172 " all accumulators in chain must have same scale factor" );
@@ -8290,6 +8213,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
82908213 OrigLoop, *LI, Legal->getWidestInductionType (),
82918214 getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ()), PSE);
82928215
8216+ // Create recipes for header phis.
8217+ VPlanTransforms::createHeaderPhiRecipes (
8218+ *VPlan0, *PSE.getSE (), *OrigLoop, Legal->getInductionVars (),
8219+ Legal->getReductionVars (), Legal->getFixedOrderRecurrences (),
8220+ CM.getInLoopReductions (), Hints.allowReordering ());
8221+
82938222 auto MaxVFTimes2 = MaxVF * 2 ;
82948223 for (ElementCount VF = MinVF; ElementCount::isKnownLT (VF, MaxVFTimes2);) {
82958224 VFRange SubRange = {VF, MaxVFTimes2};
@@ -8410,25 +8339,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84108339 // Mapping from VPValues in the initial plan to their widened VPValues. Needed
84118340 // temporarily to update created block masks.
84128341 DenseMap<VPValue *, VPValue *> Old2New;
8342+
8343+ // Now process all other blocks and instructions.
84138344 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
84148345 // Convert input VPInstructions to widened recipes.
84158346 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
8416- auto *SingleDef = cast<VPSingleDefRecipe>(&R);
8417- auto *UnderlyingValue = SingleDef->getUnderlyingValue ();
8418- // Skip recipes that do not need transforming, including canonical IV,
8419- // wide canonical IV and VPInstructions without underlying values. The
8420- // latter are added above for masking.
8421- // FIXME: Migrate code relying on the underlying instruction from VPlan0
8422- // to construct recipes below to not use the underlying instruction.
8423- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
8424- &R) ||
8425- (isa<VPInstruction>(&R) && !UnderlyingValue))
8347+ auto *SingleDef = dyn_cast<VPInstruction>(&R);
8348+ if (!SingleDef || !SingleDef->getUnderlyingValue ())
84268349 continue ;
8427- assert (isa<VPInstruction>(&R) && UnderlyingValue && " unsupported recipe" );
84288350
84298351 // TODO: Gradually replace uses of underlying instruction by analyses on
84308352 // VPlan.
8431- Instruction *Instr = cast<Instruction>(UnderlyingValue );
8353+ Instruction *Instr = cast<Instruction>(SingleDef-> getUnderlyingValue () );
84328354 Builder.setInsertPoint (SingleDef);
84338355
84348356 // The stores with invariant address inside the loop will be deleted, and
0 commit comments