Skip to content

Commit 61ceb1a

Browse files
committed
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header. Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. This can either be included in this PR or in follow-ups as needed.
1 parent af872d5 commit 61ceb1a

26 files changed

+440
-205
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2969,10 +2969,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29692969
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29702970
}
29712971

2972-
// Fix live-out phis not already fixed earlier.
2973-
for (const auto &KV : Plan.getLiveOuts())
2974-
KV.second->fixPhi(Plan, State);
2975-
29762972
for (Instruction *PI : PredicatedInstructions)
29772973
sinkScalarOperands(&*PI);
29782974

@@ -8878,21 +8874,9 @@ static void addLiveOutsForFirstOrderRecurrences(
88788874
// Start by finding out if middle block branches to scalar preheader, which is
88798875
// not a VPIRBasicBlock, unlike Exit block - the other possible successor of
88808876
// middle block.
8881-
// TODO: Should be replaced by
8882-
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8883-
// scalar region is modeled as well.
8884-
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8885-
VPBasicBlock *ScalarPHVPBB = nullptr;
8886-
if (MiddleVPBB->getNumSuccessors() == 2) {
8887-
// Order is strict: first is the exit block, second is the scalar preheader.
8888-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8889-
} else if (ExitUsersToFix.empty()) {
8890-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
8891-
} else {
8892-
llvm_unreachable("unsupported CFG in VPlan");
8893-
}
8894-
8877+
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
88958878
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8879+
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
88968880
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
88978881
VPValue *OneVPV = Plan.getOrAddLiveIn(
88988882
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
@@ -8979,7 +8963,14 @@ static void addLiveOutsForFirstOrderRecurrences(
89798963
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
89808964
"scalar.recur.init");
89818965
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8982-
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8966+
for (VPRecipeBase &R :
8967+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8968+
auto *IRI = cast<VPIRInstruction>(&R);
8969+
if (&IRI->getInstruction() == FORPhi) {
8970+
IRI->addOperand(ResumePhiRecipe);
8971+
break;
8972+
}
8973+
}
89838974

89848975
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
89858976
// Extract the penultimate value of the recurrence and use it as operand for

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
456456
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
457457
executeRecipes(State, getIRBasicBlock());
458458
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
460-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
461-
Br->setOperand(0, nullptr);
462-
getIRBasicBlock()->getTerminator()->eraseFromParent();
459+
auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
460+
if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
461+
getIRBasicBlock()->getSingleSuccessor()) {
462+
cast<BranchInst>(getIRBasicBlock()->getTerminator())
463+
->setOperand(0, nullptr);
464+
} else {
465+
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
466+
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
467+
Br->setOperand(0, nullptr);
468+
getIRBasicBlock()->getTerminator()->eraseFromParent();
469+
}
463470
}
464471

465472
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -663,14 +670,16 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
663670
}
664671
#endif
665672

666-
static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
673+
static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
674+
cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader = nullptr);
667675

668676
// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
669677
// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
670678
// Remapping of operands must be done separately. Returns a pair with the new
671679
// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
672680
// region, return nullptr for the exiting block.
673-
static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
681+
static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
682+
cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader) {
674683
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
675684
VPBlockBase *Exiting = nullptr;
676685
bool InRegion = Entry->getParent();
@@ -716,12 +725,14 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
716725
}
717726
#endif
718727

719-
return std::make_pair(Old2NewVPBlocks[Entry],
720-
Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
728+
return std::tuple(
729+
Old2NewVPBlocks[Entry], Exiting ? Old2NewVPBlocks[Exiting] : nullptr,
730+
ScalarHeader ? cast<VPIRBasicBlock>(Old2NewVPBlocks[ScalarHeader])
731+
: nullptr);
721732
}
722733

723734
VPRegionBlock *VPRegionBlock::clone() {
724-
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
735+
const auto &[NewEntry, NewExiting, _] = cloneFrom(getEntry());
725736
auto *NewRegion =
726737
new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
727738
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
@@ -843,10 +854,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
843854
#endif
844855

845856
VPlan::~VPlan() {
846-
for (auto &KV : LiveOuts)
847-
delete KV.second;
848-
LiveOuts.clear();
849-
850857
if (Entry) {
851858
VPValue DummyValue;
852859
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -878,7 +885,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
878885
VPIRBasicBlock *Entry =
879886
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
880887
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
881-
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
888+
VPIRBasicBlock *ScalarHeader =
889+
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
890+
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
882891

883892
// Create SCEV and VPValue for the trip count.
884893

@@ -909,6 +918,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
909918
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
910919

911920
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
921+
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
912922
if (!RequiresScalarEpilogueCheck) {
913923
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
914924
return Plan;
@@ -1054,6 +1064,8 @@ void VPlan::execute(VPTransformState *State) {
10541064
BrInst->insertBefore(MiddleBB->getTerminator());
10551065
MiddleBB->getTerminator()->eraseFromParent();
10561066
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1067+
State->CFG.DTU.applyUpdates(
1068+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
10571069

10581070
// Generate code in the loop pre-header and body.
10591071
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1172,12 +1184,6 @@ void VPlan::print(raw_ostream &O) const {
11721184
Block->print(O, "", SlotTracker);
11731185
}
11741186

1175-
if (!LiveOuts.empty())
1176-
O << "\n";
1177-
for (const auto &KV : LiveOuts) {
1178-
KV.second->print(O, SlotTracker);
1179-
}
1180-
11811187
O << "}\n";
11821188
}
11831189

@@ -1214,11 +1220,6 @@ LLVM_DUMP_METHOD
12141220
void VPlan::dump() const { print(dbgs()); }
12151221
#endif
12161222

1217-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1218-
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1219-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1220-
}
1221-
12221223
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12231224
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
12241225
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1260,10 +1261,12 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12601261
VPlan *VPlan::duplicate() {
12611262
// Clone blocks.
12621263
VPBasicBlock *NewPreheader = Preheader->clone();
1263-
const auto &[NewEntry, __] = cloneFrom(Entry);
1264+
const auto &[NewEntry, __, NewScalarHeader] =
1265+
cloneFrom(Entry, getScalarHeader());
12641266

12651267
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
1266-
auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
1268+
auto *NewPlan =
1269+
new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
12671270
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
12681271
for (VPValue *OldLiveIn : VPLiveInsToFree) {
12691272
Old2NewVPValues[OldLiveIn] =
@@ -1286,10 +1289,6 @@ VPlan *VPlan::duplicate() {
12861289
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12871290
remapOperands(Entry, NewEntry, Old2NewVPValues);
12881291

1289-
// Clone live-outs.
1290-
for (const auto &[_, LO] : LiveOuts)
1291-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1292-
12931292
// Initialize remaining fields of cloned VPlan.
12941293
NewPlan->VFs = VFs;
12951294
NewPlan->UFs = UFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 19 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -675,48 +675,6 @@ class VPBlockBase {
675675
virtual VPBlockBase *clone() = 0;
676676
};
677677

678-
/// A value that is used outside the VPlan. The operand of the user needs to be
679-
/// added to the associated phi node. The incoming block from VPlan is
680-
/// determined by where the VPValue is defined: if it is defined by a recipe
681-
/// outside a region, its parent block is used, otherwise the middle block is
682-
/// used.
683-
class VPLiveOut : public VPUser {
684-
PHINode *Phi;
685-
686-
public:
687-
VPLiveOut(PHINode *Phi, VPValue *Op)
688-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
689-
690-
static inline bool classof(const VPUser *U) {
691-
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
692-
}
693-
694-
/// Fix the wrapped phi node. This means adding an incoming value to exit
695-
/// block phi's from the vector loop via middle block (values from scalar loop
696-
/// already reach these phi's), and updating the value to scalar header phi's
697-
/// from the scalar preheader.
698-
void fixPhi(VPlan &Plan, VPTransformState &State);
699-
700-
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
701-
bool usesScalars(const VPValue *Op) const override {
702-
assert(is_contained(operands(), Op) &&
703-
"Op must be an operand of the recipe");
704-
return true;
705-
}
706-
707-
PHINode *getPhi() const { return Phi; }
708-
709-
/// Live-outs are marked as only using the first part during the transition
710-
/// to unrolling directly on VPlan.
711-
/// TODO: Remove after unroller transition.
712-
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
713-
714-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
715-
/// Print the VPLiveOut to \p O.
716-
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
717-
#endif
718-
};
719-
720678
/// Struct to hold various analysis needed for cost computations.
721679
struct VPCostContext {
722680
const TargetTransformInfo &TTI;
@@ -3603,6 +3561,9 @@ class VPlan {
36033561
/// rest of VPlan execution.
36043562
VPBasicBlock *Preheader;
36053563

3564+
/// VPIRBasicBlock wrapping the header of the original scalar loop.
3565+
VPIRBasicBlock *ScalarHeader;
3566+
36063567
/// Holds the VFs applicable to this VPlan.
36073568
SmallSetVector<ElementCount, 2> VFs;
36083569

@@ -3638,11 +3599,6 @@ class VPlan {
36383599
/// definitions are VPValues that hold a pointer to their underlying IR.
36393600
SmallVector<VPValue *, 16> VPLiveInsToFree;
36403601

3641-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3642-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3643-
/// live-outs are fixed via VPLiveOut::fixPhi.
3644-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3645-
36463602
/// Mapping from SCEVs to the VPValues representing their expansions.
36473603
/// NOTE: This mapping is temporary and will be removed once all users have
36483604
/// been modeled in VPlan directly.
@@ -3653,22 +3609,26 @@ class VPlan {
36533609
/// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
36543610
/// be disconnected, as the bypass blocks between them are not yet modeled in
36553611
/// VPlan.
3656-
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3657-
: VPlan(Preheader, Entry) {
3612+
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry,
3613+
VPIRBasicBlock *ScalarHeader)
3614+
: VPlan(Preheader, Entry, ScalarHeader) {
36583615
TripCount = TC;
36593616
}
36603617

36613618
/// Construct a VPlan with original preheader \p Preheader and \p Entry to
36623619
/// the plan. At the moment, \p Preheader and \p Entry need to be
36633620
/// disconnected, as the bypass blocks between them are not yet modeled in
36643621
/// VPlan.
3665-
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3666-
: Entry(Entry), Preheader(Preheader) {
3622+
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry,
3623+
VPIRBasicBlock *ScalarHeader)
3624+
: Entry(Entry), Preheader(Preheader), ScalarHeader(ScalarHeader) {
36673625
Entry->setPlan(this);
36683626
Preheader->setPlan(this);
36693627
assert(Preheader->getNumSuccessors() == 0 &&
36703628
Preheader->getNumPredecessors() == 0 &&
36713629
"preheader must be disconnected");
3630+
assert(ScalarHeader->getNumSuccessors() == 0 &&
3631+
"scalar header must be a leaf node");
36723632
}
36733633

36743634
~VPlan();
@@ -3700,6 +3660,14 @@ class VPlan {
37003660
VPBasicBlock *getEntry() { return Entry; }
37013661
const VPBasicBlock *getEntry() const { return Entry; }
37023662

3663+
/// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3664+
VPIRBasicBlock *getScalarHeader() { return ScalarHeader; }
3665+
3666+
/// Return the VPBasicBlock for the preheader of the scalar loop.
3667+
VPBasicBlock *getScalarPreheader() {
3668+
return cast<VPBasicBlock>(ScalarHeader->getSinglePredecessor());
3669+
}
3670+
37033671
/// The trip count of the original loop.
37043672
VPValue *getTripCount() const {
37053673
assert(TripCount && "trip count needs to be set before accessing it");
@@ -3827,12 +3795,6 @@ class VPlan {
38273795
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
38283796
}
38293797

3830-
void addLiveOut(PHINode *PN, VPValue *V);
3831-
3832-
const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3833-
return LiveOuts;
3834-
}
3835-
38363798
VPValue *getSCEVExpansion(const SCEV *S) const {
38373799
return SCEVToExpansion.lookup(S);
38383800
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -213,35 +213,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
213213
}
214214
}
215215

216-
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
217-
VPValue *ExitValue = getOperand(0);
218-
VPBasicBlock *MiddleVPBB =
219-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
220-
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
221-
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
222-
// Values leaving the vector loop reach live out phi's in the exiting block
223-
// via middle block.
224-
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
225-
? MiddleVPBB
226-
: ExitingVPBB;
227-
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
228-
Value *V = State.get(ExitValue, VPLane(0));
229-
if (Phi->getBasicBlockIndex(PredBB) != -1)
230-
Phi->setIncomingValueForBlock(PredBB, V);
231-
else
232-
Phi->addIncoming(V, PredBB);
233-
}
234-
235-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
236-
void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
237-
O << "Live-out ";
238-
getPhi()->printAsOperand(O);
239-
O << " = ";
240-
getOperand(0)->printAsOperand(O, SlotTracker);
241-
O << "\n";
242-
}
243-
#endif
244-
245216
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
246217
assert(!Parent && "Recipe already in some VPBasicBlock");
247218
assert(InsertPos->getParent() &&
@@ -872,7 +843,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
872843
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
873844
Value *V = State.get(ExitValue, VPLane(Lane));
874845
auto *Phi = cast<PHINode>(&I);
875-
Phi->addIncoming(V, PredBB);
846+
if (Phi->getBasicBlockIndex(PredBB) == -1)
847+
Phi->addIncoming(V, PredBB);
848+
else
849+
Phi->setIncomingValueForBlock(PredBB, V);
876850
}
877851

878852
// Advance the insert point after the wrapped IR instruction. This allows

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
379379
// Don't fold the exit block of the Plan into its single predecessor for
380380
// now.
381381
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
382-
if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
382+
if (!VPBB->getParent())
383383
continue;
384384
auto *PredVPBB =
385385
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());

0 commit comments

Comments
 (0)