Skip to content

Commit aecd404

Browse files
committed
[VPlan] Create header phis once, after constructing VPlan0 (NFC).
1 parent 0877a4c commit aecd404

File tree

5 files changed

+123
-103
lines changed

5 files changed

+123
-103
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 20 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,11 @@ class LoopVectorizationCostModel {
14231423
return InLoopReductions.contains(Phi);
14241424
}
14251425

1426+
/// Returns the set of in-loop reduction PHIs.
1427+
const SmallPtrSetImpl<PHINode *> &getInLoopReductions() const {
1428+
return InLoopReductions;
1429+
}
1430+
14261431
/// Returns true if the predicated reduction select should be used to set the
14271432
/// incoming value for the reduction phi.
14281433
bool usePredicatedReductionSelect() const {
@@ -7626,53 +7631,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
76267631
VPIRMetadata(*Store, LVer), VPI->getDebugLoc());
76277632
}
76287633

7629-
/// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
7630-
/// also insert a recipe to expand the step for the induction recipe.
7631-
static VPWidenIntOrFpInductionRecipe *
7632-
createWidenInductionRecipes(VPInstruction *PhiR,
7633-
const InductionDescriptor &IndDesc, VPlan &Plan,
7634-
ScalarEvolution &SE, Loop &OrigLoop) {
7635-
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
7636-
"step must be loop invariant");
7637-
7638-
VPValue *Start = PhiR->getOperand(0);
7639-
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
7640-
"Start VPValue must match IndDesc's start value");
7641-
7642-
VPValue *Step =
7643-
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
7644-
7645-
// Update wide induction increments to use the same step as the corresponding
7646-
// wide induction. This enables detecting induction increments directly in
7647-
// VPlan and removes redundant splats.
7648-
using namespace llvm::VPlanPatternMatch;
7649-
if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
7650-
PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
7651-
7652-
PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
7653-
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
7654-
IndDesc, PhiR->getDebugLoc());
7655-
}
7656-
7657-
VPHeaderPHIRecipe *
7658-
VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI) {
7659-
auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr());
7660-
7661-
// Check if this is an integer or fp induction. If so, build the recipe that
7662-
// produces its scalar and vector values.
7663-
if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
7664-
return createWidenInductionRecipes(VPI, *II, Plan, *PSE.getSE(), *OrigLoop);
7665-
7666-
// Check if this is pointer induction. If so, build the recipe for it.
7667-
if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
7668-
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
7669-
return new VPWidenPointerInductionRecipe(Phi, VPI->getOperand(0), Step,
7670-
&Plan.getVFxUF(), *II,
7671-
VPI->getDebugLoc());
7672-
}
7673-
return nullptr;
7674-
}
7675-
76767634
VPWidenIntOrFpInductionRecipe *
76777635
VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
76787636
VFRange &Range) {
@@ -8149,45 +8107,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
81498107
// First, check for specific widening recipes that deal with inductions, Phi
81508108
// nodes, calls and memory operations.
81518109
VPRecipeBase *Recipe;
8152-
if (auto *PhiR = dyn_cast<VPPhi>(R)) {
8153-
VPBasicBlock *Parent = PhiR->getParent();
8154-
[[maybe_unused]] VPRegionBlock *LoopRegionOf =
8155-
Parent->getEnclosingLoopRegion();
8156-
assert(LoopRegionOf && LoopRegionOf->getEntry() == Parent &&
8157-
"Non-header phis should have been handled during predication");
8158-
auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
8159-
assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis");
8160-
if ((Recipe = tryToOptimizeInductionPHI(PhiR)))
8161-
return Recipe;
8162-
8163-
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8164-
assert((Legal->isReductionVariable(Phi) ||
8165-
Legal->isFixedOrderRecurrence(Phi)) &&
8166-
"can only widen reductions and fixed-order recurrences here");
8167-
VPValue *StartV = R->getOperand(0);
8168-
if (Legal->isReductionVariable(Phi)) {
8169-
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
8170-
assert(RdxDesc.getRecurrenceStartValue() ==
8171-
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8172-
8173-
// If the PHI is used by a partial reduction, set the scale factor.
8174-
unsigned ScaleFactor =
8175-
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
8176-
PhiRecipe = new VPReductionPHIRecipe(
8177-
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
8178-
CM.useOrderedReductions(RdxDesc), ScaleFactor);
8179-
} else {
8180-
// TODO: Currently fixed-order recurrences are modeled as chains of
8181-
// first-order recurrences. If there are no users of the intermediate
8182-
// recurrences in the chain, the fixed order recurrence should be modeled
8183-
// directly, enabling more efficient codegen.
8184-
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
8185-
}
8186-
// Add backedge value.
8187-
PhiRecipe->addOperand(R->getOperand(1));
8188-
return PhiRecipe;
8189-
}
8190-
assert(!R->isPhi() && "only VPPhi nodes expected at this point");
8110+
assert(!R->isPhi() && "phis must be handled earlier");
81918111

81928112
auto *VPI = cast<VPInstruction>(R);
81938113
Instruction *Instr = R->getUnderlyingInstr();
@@ -8244,6 +8164,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
82448164
if (isa<VPReductionPHIRecipe>(BinOp) || isa<VPPartialReductionRecipe>(BinOp))
82458165
std::swap(BinOp, Accumulator);
82468166

8167+
if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator))
8168+
RedPhiR->setVFScaleFactor(ScaleFactor);
8169+
82478170
assert(ScaleFactor ==
82488171
vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
82498172
"all accumulators in chain must have same scale factor");
@@ -8290,6 +8213,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
82908213
OrigLoop, *LI, Legal->getWidestInductionType(),
82918214
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE);
82928215

8216+
// Create recipes for header phis.
8217+
VPlanTransforms::createHeaderPhiRecipes(
8218+
*VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(),
8219+
Legal->getReductionVars(), Legal->getFixedOrderRecurrences(),
8220+
CM.getInLoopReductions(), Hints.allowReordering());
8221+
82938222
auto MaxVFTimes2 = MaxVF * 2;
82948223
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
82958224
VFRange SubRange = {VF, MaxVFTimes2};
@@ -8410,25 +8339,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84108339
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
84118340
// temporarily to update created block masks.
84128341
DenseMap<VPValue *, VPValue *> Old2New;
8342+
8343+
// Now process all other blocks and instructions.
84138344
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
84148345
// Convert input VPInstructions to widened recipes.
84158346
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
8416-
auto *SingleDef = cast<VPSingleDefRecipe>(&R);
8417-
auto *UnderlyingValue = SingleDef->getUnderlyingValue();
8418-
// Skip recipes that do not need transforming, including canonical IV,
8419-
// wide canonical IV and VPInstructions without underlying values. The
8420-
// latter are added above for masking.
8421-
// FIXME: Migrate code relying on the underlying instruction from VPlan0
8422-
// to construct recipes below to not use the underlying instruction.
8423-
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
8424-
&R) ||
8425-
(isa<VPInstruction>(&R) && !UnderlyingValue))
8347+
auto *SingleDef = dyn_cast<VPInstruction>(&R);
8348+
if (!SingleDef || !SingleDef->getUnderlyingValue())
84268349
continue;
8427-
assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");
84288350

84298351
// TODO: Gradually replace uses of underlying instruction by analyses on
84308352
// VPlan.
8431-
Instruction *Instr = cast<Instruction>(UnderlyingValue);
8353+
Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
84328354
Builder.setInsertPoint(SingleDef);
84338355

84348356
// The stores with invariant address inside the loop will be deleted, and

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,6 @@ class VPRecipeBuilder {
9898
/// recipe that takes an additional VPInstruction for the mask.
9999
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);
100100

101-
/// Check if an induction recipe should be constructed for \p VPI. If so build
102-
/// and return it. If not, return null.
103-
VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI);
104-
105101
/// Optimize the special case where the operand of \p VPI is a constant
106102
/// integer induction variable.
107103
VPWidenIntOrFpInductionRecipe *

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2306,8 +2306,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
23062306
VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
23072307

23082308
VPFirstOrderRecurrencePHIRecipe *clone() override {
2309-
return new VPFirstOrderRecurrencePHIRecipe(
2309+
auto *R = new VPFirstOrderRecurrencePHIRecipe(
23102310
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2311+
R->addOperand(getOperand(1));
2312+
return R;
23112313
}
23122314

23132315
void execute(VPTransformState &State) override;
@@ -2376,6 +2378,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
23762378
/// Get the factor that the VF of this recipe's output should be scaled by.
23772379
unsigned getVFScaleFactor() const { return VFScaleFactor; }
23782380

2381+
void setVFScaleFactor(unsigned ScaleFactor) { VFScaleFactor = ScaleFactor; }
2382+
23792383
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
23802384
/// Print the recipe.
23812385
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,93 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
553553
return VPlan0;
554554
}
555555

556+
/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
557+
/// for \p Phi based on \p IndDesc.
558+
static VPHeaderPHIRecipe *
559+
createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR,
560+
const InductionDescriptor &IndDesc, VPlan &Plan,
561+
ScalarEvolution &SE, Loop &OrigLoop) {
562+
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
563+
"step must be loop invariant");
564+
565+
VPValue *Start = PhiR->getOperand(0);
566+
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
567+
"Start VPValue must match IndDesc's start value");
568+
VPValue *Step =
569+
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
570+
571+
if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction)
572+
return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
573+
IndDesc, PhiR->getDebugLoc());
574+
575+
// Update wide induction increments to use the same step as the corresponding
576+
// wide induction. This enables detecting induction increments directly in
577+
// VPlan and removes redundant splats.
578+
using namespace llvm::VPlanPatternMatch;
579+
if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
580+
PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
581+
582+
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
583+
IndDesc, PhiR->getDebugLoc());
584+
}
585+
586+
void VPlanTransforms::createHeaderPhiRecipes(
587+
VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
588+
const MapVector<PHINode *, InductionDescriptor> &Inductions,
589+
const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
590+
const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
591+
const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
592+
593+
VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
594+
Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
595+
596+
for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
597+
if (isa<VPCanonicalIVPHIRecipe>(&R))
598+
continue;
599+
auto *PhiR = dyn_cast<VPPhi>(&R);
600+
if (!PhiR)
601+
break;
602+
603+
// TODO: Gradually replace uses of underlying instruction by analyses on
604+
// VPlan.
605+
auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
606+
assert(PhiR->getNumOperands() == 2 &&
607+
"Must have 2 operands for header phis");
608+
609+
VPHeaderPHIRecipe *HeaderPhiR = nullptr;
610+
auto InductionIt = Inductions.find(Phi);
611+
if (InductionIt != Inductions.end()) {
612+
HeaderPhiR = createWidenInductionRecipe(Phi, PhiR, InductionIt->second,
613+
Plan, SE, OrigLoop);
614+
} else {
615+
VPValue *Start = PhiR->getOperand(0);
616+
auto ReductionIt = Reductions.find(Phi);
617+
if (ReductionIt != Reductions.end()) {
618+
const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
619+
assert(RdxDesc.getRecurrenceStartValue() ==
620+
Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
621+
622+
bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
623+
HeaderPhiR = new VPReductionPHIRecipe(
624+
Phi, RdxDesc.getRecurrenceKind(), *Start,
625+
InLoopReductions.contains(Phi), UseOrderedReductions);
626+
} else {
627+
assert(FixedOrderRecurrences.contains(Phi) &&
628+
"can only widen reductions and fixed-order recurrences here");
629+
// TODO: Currently fixed-order recurrences are modeled as chains of
630+
// first-order recurrences. If there are no users of the intermediate
631+
// recurrences in the chain, the fixed order recurrence should be
632+
// modeled directly, enabling more efficient codegen.
633+
HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
634+
}
635+
HeaderPhiR->addOperand(PhiR->getOperand(1));
636+
}
637+
HeaderPhiR->insertBefore(PhiR);
638+
PhiR->replaceAllUsesWith(HeaderPhiR);
639+
PhiR->eraseFromParent();
640+
}
641+
}
642+
556643
void VPlanTransforms::handleEarlyExits(VPlan &Plan,
557644
bool HasUncountableEarlyExit) {
558645
auto *MiddleVPBB = cast<VPBasicBlock>(

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ struct VPlanTransforms {
101101
buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
102102
PredicatedScalarEvolution &PSE);
103103

104+
/// Replace VPPhi recipes in \p Plan's header with corresponding
105+
/// VPHeaderPHIRecipe subclasses for inductions, reductions, and
106+
/// fixed-order recurrences. This processes all header phis and creates
107+
/// the appropriate widened recipe for each one.
108+
static void createHeaderPhiRecipes(
109+
VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
110+
const MapVector<PHINode *, InductionDescriptor> &Inductions,
111+
const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
112+
const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
113+
const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
114+
104115
/// Update \p Plan to account for all early exits.
105116
LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan,
106117
bool HasUncountableExit);

0 commit comments

Comments
 (0)