Skip to content

Commit 06fd0f9

Browse files
authored
[VPlan] Move initial skeleton construction earlier (NFC). (#150848)
Split up the not clearly named prepareForVectorization transform into buildVPlan0, which adds the vector preheader, middle and scalar preheader blocks, as well as the canonical induction recipes and sets the trip count. The new transform is run directly after building the plain CFG VPlan initially. The remaining code handling early exits and adding the branch in the middle block is renamed to handleEarlyExitsAndAddMiddleCheck and still runs at the original position. With the code movement, we only have to add the skeleton once to the initial VPlan, and cloning will take care of the rest. It will also enable moving other construction steps to work directly on VPlan0, like adding resume phis. PR: #150848
1 parent aee4f2b commit 06fd0f9

File tree

4 files changed

+104
-72
lines changed

4 files changed

+104
-72
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8384,8 +8384,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
83848384
LVer.prepareNoAliasMetadata();
83858385
}
83868386

8387+
// Create initial base VPlan0, to serve as common starting point for all
8388+
// candidates built later for specific VF ranges.
8389+
auto VPlan0 = VPlanTransforms::buildVPlan0(
8390+
OrigLoop, *LI, Legal->getWidestInductionType(),
8391+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE);
8392+
83878393
auto MaxVFTimes2 = MaxVF * 2;
8388-
auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
83898394
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
83908395
VFRange SubRange = {VF, MaxVFTimes2};
83918396
if (auto Plan = tryToBuildVPlanWithVPRecipes(
@@ -8624,23 +8629,17 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
86248629
// visit each basic block after having visited its predecessor basic blocks.
86258630
// ---------------------------------------------------------------------------
86268631

8627-
// Create initial VPlan skeleton, having a basic block for the pre-header
8628-
// which contains SCEV expansions that need to happen before the CFG is
8629-
// modified; a basic block for the vector pre-header, followed by a region for
8630-
// the vector loop, followed by the middle basic block. The skeleton vector
8631-
// loop region contains a header and latch basic blocks.
8632-
86338632
bool RequiresScalarEpilogueCheck =
86348633
LoopVectorizationPlanner::getDecisionAndClampRange(
86358634
[this](ElementCount VF) {
86368635
return !CM.requiresScalarEpilogue(VF.isVector());
86378636
},
86388637
Range);
8639-
VPlanTransforms::prepareForVectorization(
8640-
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
8641-
CM.foldTailByMasking(), OrigLoop,
8642-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
8643-
Legal->hasUncountableEarlyExit(), Range);
8638+
VPlanTransforms::handleEarlyExits(*Plan, Legal->hasUncountableEarlyExit(),
8639+
Range);
8640+
VPlanTransforms::addMiddleCheck(*Plan, RequiresScalarEpilogueCheck,
8641+
CM.foldTailByMasking());
8642+
86448643
VPlanTransforms::createLoopRegions(*Plan);
86458644
VPlanTransforms::createExtractsForLiveOuts(*Plan);
86468645

@@ -8926,11 +8925,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
89268925
assert(!OrigLoop->isInnermost());
89278926
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
89288927

8929-
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
8930-
VPlanTransforms::prepareForVectorization(
8931-
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
8932-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
8933-
Range);
8928+
auto Plan = VPlanTransforms::buildVPlan0(
8929+
OrigLoop, *LI, Legal->getWidestInductionType(),
8930+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE);
8931+
VPlanTransforms::handleEarlyExits(*Plan,
8932+
/*HasUncountableExit*/ false, Range);
8933+
VPlanTransforms::addMiddleCheck(*Plan, /*RequiresScalarEpilogue*/ true,
8934+
/*TailFolded*/ false);
8935+
89348936
VPlanTransforms::createLoopRegions(*Plan);
89358937

89368938
for (ElementCount VF : Range)

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 55 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -336,12 +336,6 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
336336
return std::move(Plan);
337337
}
338338

339-
std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(Loop *TheLoop,
340-
LoopInfo &LI) {
341-
PlainCFGBuilder Builder(TheLoop, &LI);
342-
return Builder.buildPlainCFG();
343-
}
344-
345339
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
346340
/// has exactly 2 predecessors (preheader and latch), where the block
347341
/// dominates the latch and the preheader dominates the block. If it is a
@@ -457,10 +451,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
457451
LatchDL);
458452
}
459453

460-
void VPlanTransforms::prepareForVectorization(
461-
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
462-
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
463-
DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) {
454+
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
455+
PredicatedScalarEvolution &PSE, Loop *TheLoop) {
464456
VPDominatorTree VPDT;
465457
VPDT.recalculate(Plan);
466458

@@ -486,12 +478,54 @@ void VPlanTransforms::prepareForVectorization(
486478

487479
addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
488480

489-
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
481+
// Create SCEV and VPValue for the trip count.
482+
// We use the symbolic max backedge-taken-count, which works also when
483+
// vectorizing loops with uncountable early exits.
484+
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
485+
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
486+
"Invalid backedge-taken count");
487+
ScalarEvolution &SE = *PSE.getSE();
488+
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
489+
InductionTy, TheLoop);
490+
Plan.setTripCount(
491+
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
492+
493+
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
494+
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
495+
496+
// The connection order corresponds to the operands of the conditional branch,
497+
// with the middle block already connected to the exit block.
498+
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
499+
// Also connect the entry block to the scalar preheader.
500+
// TODO: Also introduce a branch recipe together with the minimum trip count
501+
// check.
502+
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
503+
Plan.getEntry()->swapSuccessors();
504+
}
505+
506+
std::unique_ptr<VPlan>
507+
VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
508+
DebugLoc IVDL, PredicatedScalarEvolution &PSE) {
509+
PlainCFGBuilder Builder(TheLoop, &LI);
510+
std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
511+
addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
512+
return VPlan0;
513+
}
514+
515+
void VPlanTransforms::handleEarlyExits(VPlan &Plan,
516+
bool HasUncountableEarlyExit,
517+
VFRange &Range) {
518+
auto *MiddleVPBB = cast<VPBasicBlock>(
519+
Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]);
520+
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
521+
VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
522+
490523
// Disconnect all early exits from the loop leaving it with a single exit from
491524
// the latch. Early exits that are countable are left for a scalar epilog. The
492525
// condition of uncountable early exits (currently at most one is supported)
493526
// is fused into the latch exit, and used to branch from middle block to the
494527
// early exit destination.
528+
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
495529
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
496530
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
497531
if (Pred == MiddleVPBB)
@@ -500,7 +534,8 @@ void VPlanTransforms::prepareForVectorization(
500534
assert(!HandledUncountableEarlyExit &&
501535
"can handle exactly one uncountable early exit");
502536
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
503-
HeaderVPBB, LatchVPBB, Range);
537+
cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
538+
Range);
504539
HandledUncountableEarlyExit = true;
505540
} else {
506541
for (VPRecipeBase &R : EB->phis())
@@ -513,36 +548,18 @@ void VPlanTransforms::prepareForVectorization(
513548

514549
assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
515550
"missed an uncountable exit that must be handled");
551+
}
516552

517-
// Create SCEV and VPValue for the trip count.
518-
// We use the symbolic max backedge-taken-count, which works also when
519-
// vectorizing loops with uncountable early exits.
520-
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
521-
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
522-
"Invalid loop count");
523-
ScalarEvolution &SE = *PSE.getSE();
524-
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
525-
InductionTy, TheLoop);
526-
Plan.setTripCount(
527-
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
528-
529-
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
530-
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
531-
532-
// The connection order corresponds to the operands of the conditional branch,
533-
// with the middle block already connected to the exit block.
534-
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
535-
// Also connect the entry block to the scalar preheader.
536-
// TODO: Also introduce a branch recipe together with the minimum trip count
537-
// check.
538-
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
539-
Plan.getEntry()->swapSuccessors();
540-
553+
void VPlanTransforms::addMiddleCheck(VPlan &Plan,
554+
bool RequiresScalarEpilogueCheck,
555+
bool TailFolded) {
556+
auto *MiddleVPBB = cast<VPBasicBlock>(
557+
Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]);
541558
// If MiddleVPBB has a single successor then the original loop does not exit
542559
// via the latch and the single successor must be the scalar preheader.
543560
// There's no need to add a runtime check to MiddleVPBB.
544561
if (MiddleVPBB->getNumSuccessors() == 1) {
545-
assert(MiddleVPBB->getSingleSuccessor() == ScalarPH &&
562+
assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
546563
"must have ScalarPH as single successor");
547564
return;
548565
}
@@ -564,6 +581,7 @@ void VPlanTransforms::prepareForVectorization(
564581
// the corresponding compare because they may have ended up with different
565582
// line numbers and we want to avoid awkward line stepping while debugging.
566583
// E.g., if the compare has got a line number inside the loop.
584+
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
567585
DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
568586
VPBuilder Builder(MiddleVPBB);
569587
VPValue *Cmp;

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,21 +54,30 @@ struct VPlanTransforms {
5454
verifyVPlanIsValid(Plan);
5555
}
5656

57-
LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan> buildPlainCFG(Loop *TheLoop,
58-
LoopInfo &LI);
59-
60-
/// Prepare the plan for vectorization. It will introduce a dedicated
61-
/// VPBasicBlock for the vector pre-header as well as a VPBasicBlock as exit
62-
/// block of the main vector loop (middle.block). If a check is needed to
63-
/// guard executing the scalar epilogue loop, it will be added to the middle
64-
/// block, together with VPBasicBlocks for the scalar preheader and exit
65-
/// blocks. \p InductionTy is the type of the canonical induction and used for
66-
/// related values, like the trip count expression. It also creates a VPValue
67-
/// expression for the original trip count.
68-
LLVM_ABI_FOR_TEST static void prepareForVectorization(
69-
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
70-
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
71-
DebugLoc IVDL, bool HasUncountableExit, VFRange &Range);
57+
/// Create a base VPlan0, serving as the common starting point for all later
58+
/// candidates. It consists of an initial plain CFG loop with loop blocks from
59+
/// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
60+
/// corresponding to the input IR.
61+
///
62+
/// The created loop is wrapped in an initial skeleton to facilitate
63+
/// vectorization, consisting of a vector pre-header, an exit block for the
64+
/// main vector loop (middle.block) and a new block as preheader of the scalar
65+
/// loop (scalar.ph). It also adds a canonical IV and its increment, using \p
66+
/// InductionTy and \p IVDL, and creates a VPValue expression for the original
67+
/// trip count.
68+
LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
69+
buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
70+
PredicatedScalarEvolution &PSE);
71+
72+
/// Update \p Plan to account for all early exits.
73+
LLVM_ABI_FOR_TEST static void
74+
handleEarlyExits(VPlan &Plan, bool HasUncountableExit, VFRange &Range);
75+
76+
/// If a check is needed to guard executing the scalar epilogue loop, it will
77+
/// be added to the middle block.
78+
LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan,
79+
bool RequiresScalarEpilogueCheck,
80+
bool TailFolded);
7281

7382
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
7483
/// flat CFG into a hierarchical CFG.

llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,13 @@ class VPlanTestIRBase : public testing::Test {
7272

7373
Loop *L = LI->getLoopFor(LoopHeader);
7474
PredicatedScalarEvolution PSE(*SE, *L);
75-
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI);
75+
auto Plan = VPlanTransforms::buildVPlan0(L, *LI, IntegerType::get(*Ctx, 64),
76+
{}, PSE);
77+
7678
VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
77-
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
78-
PSE, true, false, L, {}, false, R);
79+
VPlanTransforms::handleEarlyExits(*Plan, false, R);
80+
VPlanTransforms::addMiddleCheck(*Plan, true, false);
81+
7982
VPlanTransforms::createLoopRegions(*Plan);
8083
return Plan;
8184
}

0 commit comments

Comments
 (0)