Skip to content

Commit 16952d7

Browse files
committed
[VPlan] Move initial skeleton construction earlier (NFC).
Split up the not clearly named prepareForVectorization transform into addInitialSkeleton, which adds the vector preheader, middle and scalar preheader blocks, as well as the canonical induction recipes and sets the trip count. The new transform is run directly after building the plain CFG VPlan initially. The remaining code handling early exits and adding the branch in the middle block is renamed to handleEarlyExitsAndAddMiddleCheck and still runs at the original position. With the code movement, we only have to add the skeleton once to the initial VPlan, and cloning will take care of the rest. It will also enable moving other construction steps to work directly on VPlan0, like adding resume phis.
1 parent f8b1c73 commit 16952d7

File tree

4 files changed

+81
-61
lines changed

4 files changed

+81
-61
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8373,8 +8373,18 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
83738373
LVer.prepareNoAliasMetadata();
83748374
}
83758375

8376-
auto MaxVFTimes2 = MaxVF * 2;
8376+
// Create initial VPlan skeleton, having a basic block for the pre-header
8377+
// which contains SCEV expansions that need to happen before the CFG is
8378+
// modified; a basic block for the vector pre-header, followed by a region for
8379+
// the vector loop, followed by the middle basic block, connecting to the
8380+
// scalar preheader and exit blcoks.
83778381
auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
8382+
VPlanTransforms::addInitialSkeleton(
8383+
*VPlan0, Legal->getWidestInductionType(),
8384+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE,
8385+
OrigLoop);
8386+
8387+
auto MaxVFTimes2 = MaxVF * 2;
83788388
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
83798389
VFRange SubRange = {VF, MaxVFTimes2};
83808390
if (auto Plan = tryToBuildVPlanWithVPRecipes(
@@ -8615,22 +8625,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
86158625
// visit each basic block after having visited its predecessor basic blocks.
86168626
// ---------------------------------------------------------------------------
86178627

8618-
// Create initial VPlan skeleton, having a basic block for the pre-header
8619-
// which contains SCEV expansions that need to happen before the CFG is
8620-
// modified; a basic block for the vector pre-header, followed by a region for
8621-
// the vector loop, followed by the middle basic block. The skeleton vector
8622-
// loop region contains a header and latch basic blocks.
8623-
86248628
bool RequiresScalarEpilogueCheck =
86258629
LoopVectorizationPlanner::getDecisionAndClampRange(
86268630
[this](ElementCount VF) {
86278631
return !CM.requiresScalarEpilogue(VF.isVector());
86288632
},
86298633
Range);
8630-
VPlanTransforms::prepareForVectorization(
8631-
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
8632-
CM.foldTailByMasking(), OrigLoop,
8633-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
8634+
VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(
8635+
*Plan, RequiresScalarEpilogueCheck, CM.foldTailByMasking(),
86348636
Legal->hasUncountableEarlyExit(), Range);
86358637
VPlanTransforms::createLoopRegions(*Plan);
86368638
VPlanTransforms::createExtractsForLiveOuts(*Plan);
@@ -8918,10 +8920,13 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
89188920
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
89198921

89208922
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
8921-
VPlanTransforms::prepareForVectorization(
8922-
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
8923-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
8924-
Range);
8923+
8924+
VPlanTransforms::addInitialSkeleton(
8925+
*Plan, Legal->getWidestInductionType(),
8926+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE,
8927+
OrigLoop);
8928+
VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(*Plan, true, false, false,
8929+
Range);
89258930
VPlanTransforms::createLoopRegions(*Plan);
89268931

89278932
for (ElementCount VF : Range)

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 42 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -459,10 +459,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
459459
LatchDL);
460460
}
461461

462-
void VPlanTransforms::prepareForVectorization(
463-
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
464-
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
465-
DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) {
462+
void VPlanTransforms::addInitialSkeleton(VPlan &Plan, Type *InductionTy,
463+
DebugLoc IVDL,
464+
PredicatedScalarEvolution &PSE,
465+
Loop *TheLoop) {
466466
VPDominatorTree VPDT;
467467
VPDT.recalculate(Plan);
468468

@@ -488,12 +488,46 @@ void VPlanTransforms::prepareForVectorization(
488488

489489
addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
490490

491-
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
491+
// Create SCEV and VPValue for the trip count.
492+
// We use the symbolic max backedge-taken-count, which works also when
493+
// vectorizing loops with uncountable early exits.
494+
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
495+
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
496+
"Invalid loop count");
497+
ScalarEvolution &SE = *PSE.getSE();
498+
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
499+
InductionTy, TheLoop);
500+
Plan.setTripCount(
501+
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
502+
503+
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
504+
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
505+
506+
// The connection order corresponds to the operands of the conditional branch,
507+
// with the middle block already connected to the exit block.
508+
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
509+
// Also connect the entry block to the scalar preheader.
510+
// TODO: Also introduce a branch recipe together with the minimum trip count
511+
// check.
512+
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
513+
Plan.getEntry()->swapSuccessors();
514+
}
515+
516+
void VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(
517+
VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded,
518+
bool HasUncountableEarlyExit, VFRange &Range) {
519+
auto *MiddleVPBB = cast<VPBasicBlock>(
520+
Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]);
521+
VPBlockBase *HeaderVPB =
522+
Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor();
523+
auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPB->getPredecessors()[1]);
524+
492525
// Disconnect all early exits from the loop leaving it with a single exit from
493526
// the latch. Early exits that are countable are left for a scalar epilog. The
494527
// condition of uncountable early exits (currently at most one is supported)
495528
// is fused into the latch exit, and used to branch from middle block to the
496529
// early exit destination.
530+
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
497531
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
498532
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
499533
if (Pred == MiddleVPBB)
@@ -502,7 +536,8 @@ void VPlanTransforms::prepareForVectorization(
502536
assert(!HandledUncountableEarlyExit &&
503537
"can handle exactly one uncountable early exit");
504538
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
505-
HeaderVPBB, LatchVPBB, Range);
539+
cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
540+
Range);
506541
HandledUncountableEarlyExit = true;
507542
} else {
508543
for (VPRecipeBase &R : EB->phis())
@@ -516,38 +551,11 @@ void VPlanTransforms::prepareForVectorization(
516551
assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
517552
"missed an uncountable exit that must be handled");
518553

519-
// Create SCEV and VPValue for the trip count.
520-
// We use the symbolic max backedge-taken-count, which works also when
521-
// vectorizing loops with uncountable early exits.
522-
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
523-
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
524-
"Invalid loop count");
525-
ScalarEvolution &SE = *PSE.getSE();
526-
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
527-
InductionTy, TheLoop);
528-
Plan.setTripCount(
529-
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
530-
531-
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
532-
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
533-
534-
// The connection order corresponds to the operands of the conditional branch,
535-
// with the middle block already connected to the exit block.
536-
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
537-
// Also connect the entry block to the scalar preheader.
538-
// TODO: Also introduce a branch recipe together with the minimum trip count
539-
// check.
540-
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
541-
Plan.getEntry()->swapSuccessors();
542-
543554
// If MiddleVPBB has a single successor then the original loop does not exit
544555
// via the latch and the single successor must be the scalar preheader.
545556
// There's no need to add a runtime check to MiddleVPBB.
546-
if (MiddleVPBB->getNumSuccessors() == 1) {
547-
assert(MiddleVPBB->getSingleSuccessor() == ScalarPH &&
548-
"must have ScalarPH as single successor");
557+
if (MiddleVPBB->getNumSuccessors() == 1)
549558
return;
550-
}
551559

552560
assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
553561

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,21 @@ struct VPlanTransforms {
5858
LoopInfo &LI);
5959

6060
/// Prepare the plan for vectorization. It will introduce a dedicated
61-
/// VPBasicBlock for the vector pre-header as well as a VPBasicBlock as exit
62-
/// block of the main vector loop (middle.block). If a check is needed to
61+
/// VPBasicBlock for the vector pre-header, a VPBasicBlock as exit
62+
/// block of the main vector loop (middle.block) and a VPBaiscBlock for the
63+
/// scalar preheader. It also adds a canonical IV and its increment, using \p
64+
/// InductionTy and \p IVDL, and creates a VPValue expression for the original
65+
/// trip count.
66+
LLVM_ABI_FOR_TEST static void
67+
addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
68+
PredicatedScalarEvolution &PSE, Loop *TheLoop);
69+
70+
/// Update \p Plan to account for all early exits. If a check is needed to
6371
/// guard executing the scalar epilogue loop, it will be added to the middle
64-
/// block, together with VPBasicBlocks for the scalar preheader and exit
65-
/// blocks. \p InductionTy is the type of the canonical induction and used for
66-
/// related values, like the trip count expression. It also creates a VPValue
67-
/// expression for the original trip count.
68-
LLVM_ABI_FOR_TEST static void prepareForVectorization(
69-
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
70-
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
71-
DebugLoc IVDL, bool HasUncountableExit, VFRange &Range);
72+
/// block
73+
LLVM_ABI_FOR_TEST static void handleEarlyExitsAndAddMiddleCheck(
74+
VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded,
75+
bool HasUncountableExit, VFRange &Range);
7276

7377
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
7478
/// flat CFG into a hierarchical CFG.

llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,11 @@ class VPlanTestIRBase : public testing::Test {
7474
PredicatedScalarEvolution PSE(*SE, *L);
7575
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI);
7676
VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
77-
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
78-
PSE, true, false, L, {}, false, R);
77+
VPlanTransforms::addInitialSkeleton(*Plan, IntegerType::get(*Ctx, 64), {},
78+
PSE, L);
79+
80+
VPlanTransforms::handleEarlyExitsAndAddMiddleCheck(*Plan, true, false,
81+
false, R);
7982
VPlanTransforms::createLoopRegions(*Plan);
8083
return Plan;
8184
}

0 commit comments

Comments
 (0)