Skip to content

Commit facb382

Browse files
author
git apple-llvm automerger
committed
Merge commit 'dcef154b5caf' from llvm.org/main into next
2 parents 3cf4034 + dcef154 commit facb382

16 files changed

+299
-246
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2781,13 +2781,13 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
27812781
PSE.getSE()->forgetLoop(OrigLoop);
27822782
PSE.getSE()->forgetBlockAndLoopDispositions();
27832783

2784-
// Don't apply optimizations below when no vector region remains, as they all
2785-
// require a vector loop at the moment.
2786-
if (!State.Plan->getVectorLoopRegion())
2784+
// Don't apply optimizations below when no (vector) loop remains, as they all
2785+
// require one at the moment.
2786+
VPBasicBlock *HeaderVPBB =
2787+
vputils::getFirstLoopHeader(*State.Plan, State.VPDT);
2788+
if (!HeaderVPBB)
27872789
return;
27882790

2789-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
2790-
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
27912791
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
27922792

27932793
// Remove redundant induction instructions.
@@ -2812,7 +2812,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28122812
}
28132813

28142814
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
2815-
auto Iter = vp_depth_first_deep(Plan.getEntry());
2815+
auto Iter = vp_depth_first_shallow(Plan.getEntry());
28162816
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
28172817
for (VPRecipeBase &P : VPBB->phis()) {
28182818
VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
@@ -7623,6 +7623,13 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76237623
BestVPlan, BestVF,
76247624
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
76257625
VPlanTransforms::removeDeadRecipes(BestVPlan);
7626+
7627+
// Retrieve and store the middle block before dissolving regions. Regions are
7628+
// dissolved after optimizing for VF and UF, which completely removes unneeded
7629+
// loop regions first.
7630+
VPBasicBlock *MiddleVPBB =
7631+
BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr;
7632+
VPlanTransforms::dissolveLoopRegions(BestVPlan);
76267633
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
76277634
*Legal->getWidestInductionType());
76287635
// Perform the actual loop transformation.
@@ -7720,14 +7727,14 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77207727
// 2.6. Maintain Loop Hints
77217728
// Keep all loop hints from the original loop on the vector loop (we'll
77227729
// replace the vectorizer-specific hints below).
7723-
if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) {
7730+
VPBasicBlock *HeaderVPBB = vputils::getFirstLoopHeader(BestVPlan, State.VPDT);
7731+
if (HeaderVPBB) {
77247732
MDNode *OrigLoopID = OrigLoop->getLoopID();
77257733

77267734
std::optional<MDNode *> VectorizedLoopID =
77277735
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
77287736
LLVMLoopVectorizeFollowupVectorized});
77297737

7730-
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
77317738
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
77327739
if (VectorizedLoopID) {
77337740
L->setLoopID(*VectorizedLoopID);
@@ -7773,8 +7780,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77737780
ILV.printDebugTracesAtEnd();
77747781

77757782
// 4. Adjust branch weight of the branch in the middle block.
7776-
if (BestVPlan.getVectorLoopRegion()) {
7777-
auto *MiddleVPBB = BestVPlan.getMiddleBlock();
7783+
if (HeaderVPBB) {
77787784
auto *MiddleTerm =
77797785
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());
77807786
if (MiddleTerm->isConditional() &&

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 84 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,32 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207207
return Parent->getEnclosingBlockWithPredecessors();
208208
}
209209

210+
bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
211+
const VPDominatorTree &VPDT) {
212+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
213+
if (!VPBB)
214+
return false;
215+
216+
// If VPBB is in a region R, VPBB is a loop header if R is a loop region with
217+
// VPBB as its entry, i.e., free of predecessors.
218+
if (auto *R = VPBB->getParent())
219+
return !R->isReplicator() && VPBB->getNumPredecessors() == 0;
220+
221+
// A header dominates its second predecessor (the latch), with the other
222+
// predecessor being the preheader
223+
return VPB->getPredecessors().size() == 2 &&
224+
VPDT.dominates(VPB, VPB->getPredecessors()[1]);
225+
}
226+
227+
bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
228+
const VPDominatorTree &VPDT) {
229+
// A latch has a header as its second successor, with its other successor
230+
// leaving the loop. A preheader OTOH has a header as its first (and only)
231+
// successor.
232+
return VPB->getNumSuccessors() == 2 &&
233+
VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
234+
}
235+
210236
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
211237
iterator It = begin();
212238
while (It != end() && It->isPhi())
@@ -424,13 +450,21 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
424450
if (ParentLoop && !State.LI->getLoopFor(NewBB))
425451
ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
426452

453+
SmallVector<VPBlockBase *> Preds;
454+
if (VPBlockUtils::isHeader(this, State.VPDT)) {
455+
// There's no block for the latch yet, connect to the preheader only.
456+
Preds = {getPredecessors()[0]};
457+
} else {
458+
Preds = to_vector(getPredecessors());
459+
}
460+
427461
// Hook up the new basic block to its predecessors.
428-
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
462+
for (VPBlockBase *PredVPBlock : Preds) {
429463
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
430464
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
465+
assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
466+
"Predecessor basic-block not found building successor.");
431467
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
432-
433-
assert(PredBB && "Predecessor basic-block not found building successor.");
434468
auto *PredBBTerminator = PredBB->getTerminator();
435469
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
436470

@@ -491,11 +525,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
491525
bool Replica = bool(State->Lane);
492526
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
493527

528+
if (VPBlockUtils::isHeader(this, State->VPDT)) {
529+
// Create and register the new vector loop.
530+
Loop *PrevParentLoop = State->CurrentParentLoop;
531+
State->CurrentParentLoop = State->LI->AllocateLoop();
532+
533+
// Insert the new loop into the loop nest and register the new basic blocks
534+
// before calling any utilities such as SCEV that require valid LoopInfo.
535+
if (PrevParentLoop)
536+
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
537+
else
538+
State->LI->addTopLevelLoop(State->CurrentParentLoop);
539+
}
540+
494541
auto IsReplicateRegion = [](VPBlockBase *BB) {
495542
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
496-
return R && R->isReplicator();
543+
assert((!R || R->isReplicator()) &&
544+
"only replicate region blocks should remain");
545+
return R;
497546
};
498-
499547
// 1. Create an IR basic block.
500548
if ((Replica && this == getParent()->getEntry()) ||
501549
IsReplicateRegion(getSingleHierarchicalPredecessor())) {
@@ -518,6 +566,10 @@ void VPBasicBlock::execute(VPTransformState *State) {
518566

519567
// 2. Fill the IR basic block with IR instructions.
520568
executeRecipes(State, NewBB);
569+
570+
// If this block is a latch, update CurrentParentLoop.
571+
if (VPBlockUtils::isLatch(this, State->VPDT))
572+
State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
521573
}
522574

523575
VPBasicBlock *VPBasicBlock::clone() {
@@ -729,35 +781,13 @@ VPRegionBlock *VPRegionBlock::clone() {
729781
}
730782

731783
void VPRegionBlock::execute(VPTransformState *State) {
732-
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
733-
RPOT(Entry);
734-
735-
if (!isReplicator()) {
736-
// Create and register the new vector loop.
737-
Loop *PrevParentLoop = State->CurrentParentLoop;
738-
State->CurrentParentLoop = State->LI->AllocateLoop();
739-
740-
// Insert the new loop into the loop nest and register the new basic blocks
741-
// before calling any utilities such as SCEV that require valid LoopInfo.
742-
if (PrevParentLoop)
743-
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
744-
else
745-
State->LI->addTopLevelLoop(State->CurrentParentLoop);
746-
747-
// Visit the VPBlocks connected to "this", starting from it.
748-
for (VPBlockBase *Block : RPOT) {
749-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
750-
Block->execute(State);
751-
}
752-
753-
State->CurrentParentLoop = PrevParentLoop;
754-
return;
755-
}
756-
784+
assert(isReplicator() &&
785+
"Loop regions should have been lowered to plain CFG");
757786
assert(!State->Lane && "Replicating a Region with non-null instance.");
758-
759-
// Enter replicating mode.
760787
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
788+
789+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
790+
Entry);
761791
State->Lane = VPLane(0);
762792
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
763793
++Lane) {
@@ -851,6 +881,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
851881
}
852882
#endif
853883

884+
void VPRegionBlock::dissolveToCFGLoop() {
885+
auto *Header = cast<VPBasicBlock>(getEntry());
886+
VPBlockBase *Preheader = getSinglePredecessor();
887+
auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
888+
VPBlockBase *Middle = getSingleSuccessor();
889+
VPBlockUtils::disconnectBlocks(Preheader, this);
890+
VPBlockUtils::disconnectBlocks(this, Middle);
891+
892+
for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
893+
VPB->setParent(getParent());
894+
895+
VPBlockUtils::connectBlocks(Preheader, Header);
896+
VPBlockUtils::connectBlocks(ExitingLatch, Middle);
897+
VPBlockUtils::connectBlocks(ExitingLatch, Header);
898+
}
899+
854900
VPlan::VPlan(Loop *L) {
855901
setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
856902
ScalarHeader = createVPIRBasicBlock(L->getHeader());
@@ -962,16 +1008,15 @@ void VPlan::execute(VPTransformState *State) {
9621008

9631009
State->CFG.DTU.flush();
9641010

965-
auto *LoopRegion = getVectorLoopRegion();
966-
if (!LoopRegion)
1011+
VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
1012+
if (!Header)
9671013
return;
9681014

969-
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1015+
auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
9701016
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
9711017

9721018
// Fix the latch value of canonical, reduction and first-order recurrences
9731019
// phis in the vector loop.
974-
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
9751020
for (VPRecipeBase &R : Header->phis()) {
9761021
// Skip phi-like recipes that generate their backedege values themselves.
9771022
if (isa<VPWidenPHIRecipe>(&R))
@@ -1007,8 +1052,10 @@ void VPlan::execute(VPTransformState *State) {
10071052
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
10081053
(isa<VPReductionPHIRecipe>(PhiR) &&
10091054
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1055+
10101056
Value *Phi = State->get(PhiR, NeedsScalar);
1011-
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1057+
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1058+
// not.
10121059
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
10131060
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10141061
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3872,6 +3872,10 @@ class VPRegionBlock : public VPBlockBase {
38723872
/// Clone all blocks in the single-entry single-exit region of the block and
38733873
/// their recipes without updating the operands of the cloned recipes.
38743874
VPRegionBlock *clone() override;
3875+
3876+
/// Remove the current region from its VPlan, connecting its predecessor to
3877+
/// its entry, and its exiting block to its successor.
3878+
void dissolveToCFGLoop();
38753879
};
38763880

38773881
/// VPlan models a candidate for vectorization, encoding various decisions take

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 22 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,26 @@ Value *VPInstruction::generatePerLane(VPTransformState &State,
462462
State.get(getOperand(1), Lane), Name);
463463
}
464464

465+
/// Create a conditional branch using \p Cond branching to the successors of \p
466+
/// VPBB. Note that the first successor is always forward (i.e. not created yet)
467+
/// while the second successor may already have been created (if it is a header
468+
/// block and VPBB is a latch).
469+
static BranchInst *createCondBranch(Value *Cond, VPBasicBlock *VPBB,
470+
VPTransformState &State) {
471+
// Replace the temporary unreachable terminator with a new conditional
472+
// branch, hooking it up to backward destination (header) for latch blocks
473+
// now, and to forward destination(s) later when they are created.
474+
// Second successor may be backwards - iff it is already in VPBB2IRBB.
475+
VPBasicBlock *SecondVPSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
476+
BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc);
477+
BasicBlock *IRBB = State.CFG.VPBB2IRBB[VPBB];
478+
BranchInst *CondBr = State.Builder.CreateCondBr(Cond, IRBB, SecondIRSucc);
479+
// First successor is always forward, reset it to nullptr
480+
CondBr->setSuccessor(0, nullptr);
481+
IRBB->getTerminator()->eraseFromParent();
482+
return CondBr;
483+
}
484+
465485
Value *VPInstruction::generate(VPTransformState &State) {
466486
IRBuilderBase &Builder = State.Builder;
467487

@@ -581,43 +601,14 @@ Value *VPInstruction::generate(VPTransformState &State) {
581601
}
582602
case VPInstruction::BranchOnCond: {
583603
Value *Cond = State.get(getOperand(0), VPLane(0));
584-
// Replace the temporary unreachable terminator with a new conditional
585-
// branch, hooking it up to backward destination for exiting blocks now and
586-
// to forward destination(s) later when they are created.
587-
BranchInst *CondBr =
588-
Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
589-
CondBr->setSuccessor(0, nullptr);
590-
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
591-
592-
if (!getParent()->isExiting())
593-
return CondBr;
594-
595-
VPRegionBlock *ParentRegion = getParent()->getParent();
596-
VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
597-
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
598-
return CondBr;
604+
return createCondBranch(Cond, getParent(), State);
599605
}
600606
case VPInstruction::BranchOnCount: {
601607
// First create the compare.
602608
Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
603609
Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
604610
Value *Cond = Builder.CreateICmpEQ(IV, TC);
605-
606-
// Now create the branch.
607-
auto *Plan = getParent()->getPlan();
608-
VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
609-
VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
610-
611-
// Replace the temporary unreachable terminator with a new conditional
612-
// branch, hooking it up to backward destination (the header) now and to the
613-
// forward destination (the exit/middle block) later when it is created.
614-
// Note that CreateCondBr expects a valid BB as first argument, so we need
615-
// to set it to nullptr later.
616-
BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
617-
State.CFG.VPBB2IRBB[Header]);
618-
CondBr->setSuccessor(0, nullptr);
619-
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
620-
return CondBr;
611+
return createCondBranch(Cond, getParent(), State);
621612
}
622613
case VPInstruction::Broadcast: {
623614
return Builder.CreateVectorSplat(
@@ -1127,10 +1118,6 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
11271118

11281119
void VPPhi::execute(VPTransformState &State) {
11291120
State.setDebugLocFrom(getDebugLoc());
1130-
assert(getParent() ==
1131-
getParent()->getPlan()->getVectorLoopRegion()->getEntry() &&
1132-
"VPInstructions with PHI opcodes must be used for header phis only "
1133-
"at the moment");
11341121
BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0));
11351122
Value *Start = State.get(getIncomingValue(0), VPLane(0));
11361123
PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName());

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2506,6 +2506,18 @@ void VPlanTransforms::createInterleaveGroups(
25062506
}
25072507
}
25082508

2509+
void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
2510+
// Replace loop regions with explicity CFG.
2511+
SmallVector<VPRegionBlock *> LoopRegions;
2512+
for (VPRegionBlock *R : VPBlockUtils::blocksOnly<VPRegionBlock>(
2513+
vp_depth_first_deep(Plan.getEntry()))) {
2514+
if (!R->isReplicator())
2515+
LoopRegions.push_back(R);
2516+
}
2517+
for (VPRegionBlock *R : LoopRegions)
2518+
R->dissolveToCFGLoop();
2519+
}
2520+
25092521
// Expand VPExtendedReductionRecipe to VPWidenCastRecipe + VPReductionRecipe.
25102522
static void expandVPExtendedReduction(VPExtendedReductionRecipe *ExtRed) {
25112523
VPWidenCastRecipe *Ext;

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ struct VPlanTransforms {
184184
VPBasicBlock *LatchVPBB,
185185
VFRange &Range);
186186

187+
/// Replace loop regions with explicit CFG.
188+
static void dissolveLoopRegions(VPlan &Plan);
189+
187190
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
188191
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
189192
static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy);

0 commit comments

Comments
 (0)