Skip to content

Commit f180edb

Browse files
committed
!fixup address latest comments, thanks
1 parent e17003f commit f180edb

File tree

5 files changed

+103
-92
lines changed

5 files changed

+103
-92
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3012,6 +3012,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
30123012
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
30133013
}
30143014

3015+
// Don't apply optimizations below when no vector region remains, as they all
3016+
// require a vector loop at the moment.
30153017
if (!State.Plan->getVectorLoopRegion())
30163018
return;
30173019

@@ -7811,14 +7813,14 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78117813
// 2.6. Maintain Loop Hints
78127814
// Keep all loop hints from the original loop on the vector loop (we'll
78137815
// replace the vectorizer-specific hints below).
7814-
MDNode *OrigLoopID = OrigLoop->getLoopID();
7816+
if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) {
7817+
MDNode *OrigLoopID = OrigLoop->getLoopID();
78157818

7816-
std::optional<MDNode *> VectorizedLoopID =
7817-
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
7818-
LLVMLoopVectorizeFollowupVectorized});
7819+
std::optional<MDNode *> VectorizedLoopID =
7820+
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
7821+
LLVMLoopVectorizeFollowupVectorized});
78197822

7820-
if (auto *R = BestVPlan.getVectorLoopRegion()) {
7821-
VPBasicBlock *HeaderVPBB = R->getEntryBasicBlock();
7823+
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
78227824
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
78237825
if (VectorizedLoopID) {
78247826
L->setLoopID(*VectorizedLoopID);
@@ -7844,11 +7846,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78447846
ILV.printDebugTracesAtEnd();
78457847

78467848
// 4. Adjust branch weight of the branch in the middle block.
7847-
if (auto *R = BestVPlan.getVectorLoopRegion()) {
7848-
auto *ExitVPBB = cast<VPBasicBlock>(R->getSingleSuccessor());
7849-
7849+
if (BestVPlan.getVectorLoopRegion()) {
7850+
auto *MiddleVPBB = BestVPlan.getMiddleBlock();
78507851
auto *MiddleTerm =
7851-
cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7852+
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());
78527853
if (MiddleTerm->isConditional() &&
78537854
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
78547855
// Assume that `Count % VectorTripCount` is equally distributed.

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 57 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,8 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
554554
template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
555555
if (P && P->isReplicator()) {
556556
P = P->getParent();
557+
// Multiple loop regions can be nested, but replicate regions can only be
558+
// nested inside a loop region or must be outside any other region.
557559
assert((!P || !cast<VPRegionBlock>(P)->isReplicator()) &&
558560
"unexpected nested replicate regions");
559561
}
@@ -933,6 +935,8 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
933935

934936
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
935937
// FIXME: Model VF * UF computation completely in VPlan.
938+
assert((!getVectorLoopRegion() || VFxUF.getNumUsers()) &&
939+
"VFxUF expected to always have users");
936940
unsigned UF = getUF();
937941
if (VF.getNumUsers()) {
938942
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
@@ -986,54 +990,56 @@ void VPlan::execute(VPTransformState *State) {
986990
for (VPBlockBase *Block : RPOT)
987991
Block->execute(State);
988992

989-
if (auto *LoopRegion = getVectorLoopRegion()) {
990-
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
991-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
993+
State->CFG.DTU.flush();
992994

993-
// Fix the latch value of canonical, reduction and first-order recurrences
994-
// phis in the vector loop.
995-
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
996-
for (VPRecipeBase &R : Header->phis()) {
997-
// Skip phi-like recipes that generate their backedege values themselves.
998-
if (isa<VPWidenPHIRecipe>(&R))
999-
continue;
995+
auto *LoopRegion = getVectorLoopRegion();
996+
if (!LoopRegion)
997+
return;
1000998

1001-
if (isa<VPWidenInductionRecipe>(&R)) {
1002-
PHINode *Phi = nullptr;
1003-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1004-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1005-
} else {
1006-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1007-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1008-
"recipe generating only scalars should have been replaced");
1009-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1010-
Phi = cast<PHINode>(GEP->getPointerOperand());
1011-
}
1012-
1013-
Phi->setIncomingBlock(1, VectorLatchBB);
1014-
1015-
// Move the last step to the end of the latch block. This ensures
1016-
// consistent placement of all induction updates.
1017-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1018-
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1019-
1020-
// Use the steps for the last part as backedge value for the induction.
1021-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1022-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1023-
continue;
999+
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1000+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1001+
1002+
// Fix the latch value of canonical, reduction and first-order recurrences
1003+
// phis in the vector loop.
1004+
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1005+
for (VPRecipeBase &R : Header->phis()) {
1006+
// Skip phi-like recipes that generate their backedege values themselves.
1007+
if (isa<VPWidenPHIRecipe>(&R))
1008+
continue;
1009+
1010+
if (isa<VPWidenInductionRecipe>(&R)) {
1011+
PHINode *Phi = nullptr;
1012+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1013+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1014+
} else {
1015+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1016+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1017+
"recipe generating only scalars should have been replaced");
1018+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1019+
Phi = cast<PHINode>(GEP->getPointerOperand());
10241020
}
10251021

1026-
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1027-
bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
1028-
(isa<VPReductionPHIRecipe>(PhiR) &&
1029-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1030-
Value *Phi = State->get(PhiR, NeedsScalar);
1031-
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1032-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1022+
Phi->setIncomingBlock(1, VectorLatchBB);
1023+
1024+
// Move the last step to the end of the latch block. This ensures
1025+
// consistent placement of all induction updates.
1026+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1027+
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1028+
1029+
// Use the steps for the last part as backedge value for the induction.
1030+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1031+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1032+
continue;
10331033
}
1034-
}
10351034

1036-
State->CFG.DTU.flush();
1035+
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1036+
bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
1037+
(isa<VPReductionPHIRecipe>(PhiR) &&
1038+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1039+
Value *Phi = State->get(PhiR, NeedsScalar);
1040+
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1041+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1042+
}
10371043
}
10381044

10391045
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1399,13 +1405,17 @@ void VPlanIngredient::print(raw_ostream &O) const {
13991405

14001406
#endif
14011407

1402-
bool VPValue::isDefinedOutsideLoopRegions() const {
1403-
1404-
return !hasDefiningRecipe() ||
1405-
(!getDefiningRecipe()->getParent()->getEnclosingLoopRegion() &&
1406-
getDefiningRecipe()->getParent()->getPlan()->getVectorLoopRegion());
1408+
/// Returns true if there is a vector loop region and \p VPV is defined in a
1409+
/// loop region.
1410+
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1411+
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1412+
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1413+
DefR->getParent()->getEnclosingLoopRegion());
14071414
}
14081415

1416+
bool VPValue::isDefinedOutsideLoopRegions() const {
1417+
return !isDefinedInsideLoopRegions(this);
1418+
}
14091419
void VPValue::replaceAllUsesWith(VPValue *New) {
14101420
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
14111421
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3809,6 +3809,8 @@ class VPlan {
38093809

38103810
~VPlan();
38113811

3812+
/// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3813+
/// EntryBlock must have no predecessors.
38123814
void setEntry(VPBasicBlock *VPBB) {
38133815
Entry = VPBB;
38143816
VPBB->setPlan(this);
@@ -3842,7 +3844,8 @@ class VPlan {
38423844
VPBasicBlock *getEntry() { return Entry; }
38433845
const VPBasicBlock *getEntry() const { return Entry; }
38443846

3845-
/// Returns the preheader of the vector loop region.
3847+
/// Returns the preheader of the vector loop region, if one exists, or null
3848+
/// otherwise.
38463849
VPBasicBlock *getVectorPreheader() {
38473850
VPRegionBlock *VectorRegion = getVectorLoopRegion();
38483851
return VectorRegion

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2376,9 +2376,7 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
23762376
// Replace the temporary unreachable terminator with a new conditional branch,
23772377
// whose two destinations will be set later when they are created.
23782378
auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2379-
assert((isa<UnreachableInst>(CurrentTerminator) ||
2380-
(isa<BranchInst>(CurrentTerminator) &&
2381-
!CurrentTerminator->getOperand(0))) &&
2379+
assert(isa<UnreachableInst>(CurrentTerminator) &&
23822380
"Expected to replace unreachable terminator with conditional branch.");
23832381
auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
23842382
CondBr->setSuccessor(0, nullptr);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -794,15 +794,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
794794
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
795795
}
796796

797-
/// Try to simplify the recipes in \p Plan. If \p CanonicalIVTy is not nullptr,
798-
/// use it directly instead of retrieving the canonical IV type from the plan
799-
/// which may not exist any longer.
800-
static void simplifyRecipes(VPlan &Plan, Type *CanonicalIVTy = nullptr) {
797+
/// Try to simplify the recipes in \p Plan. Use \p CanonicalIVTy as type for all
798+
/// un-typed live-ins in VPTypeAnalysis.
799+
static void simplifyRecipes(VPlan &Plan, Type *CanonicalIVTy) {
801800
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
802801
Plan.getEntry());
803-
Type *CanonicalIVType =
804-
CanonicalIVTy ? CanonicalIVTy : Plan.getCanonicalIV()->getScalarType();
805-
VPTypeAnalysis TypeInfo(CanonicalIVType);
802+
VPTypeAnalysis TypeInfo(CanonicalIVTy);
806803
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
807804
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
808805
simplifyRecipe(R, TypeInfo);
@@ -840,38 +837,40 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
840837
!SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
841838
return;
842839

840+
// The vector loop region only executes once. If possible, completely remove
841+
// the region, otherwise replace the terminator controlling the latch with
842+
// (BranchOnCond true).
843843
Term->eraseFromParent();
844844
auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());
845845
auto *CanIVTy = Plan.getCanonicalIV()->getScalarType();
846-
if (any_of(Header->phis(),
847-
IsaPred<VPWidenIntOrFpInductionRecipe, VPReductionPHIRecipe>)) {
848-
LLVMContext &Ctx = SE.getContext();
849-
auto *BOC = new VPInstruction(
850-
VPInstruction::BranchOnCond,
851-
{Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc());
852-
ExitingVPBB->appendRecipe(BOC);
853-
} else {
854-
for (VPRecipeBase &R : make_early_inc_range(Header->phis())) {
855-
auto *P = cast<VPHeaderPHIRecipe>(&R);
856-
P->replaceAllUsesWith(P->getStartValue());
857-
P->eraseFromParent();
846+
if (all_of(
847+
Header->phis(),
848+
IsaPred<VPCanonicalIVPHIRecipe, VPFirstOrderRecurrencePHIRecipe>)) {
849+
for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) {
850+
auto *HeaderPhiR = cast<VPHeaderPHIRecipe>(&HeaderR);
851+
HeaderPhiR->replaceAllUsesWith(HeaderPhiR->getStartValue());
852+
HeaderPhiR->eraseFromParent();
858853
}
859854

860-
VPBlockBase *Preheader = Plan.getVectorPreheader();
861-
VPBlockBase *Middle = Plan.getMiddleBlock();
855+
VPBlockBase *Preheader = VectorRegion->getSinglePredecessor();
856+
VPBlockBase *Exit = VectorRegion->getSingleSuccessor();
862857
VPBlockUtils::disconnectBlocks(Preheader, VectorRegion);
863-
VPBlockUtils::disconnectBlocks(VectorRegion, Middle);
858+
VPBlockUtils::disconnectBlocks(VectorRegion, Exit);
864859

865-
Header->setParent(nullptr);
866-
ExitingVPBB->setParent(nullptr);
860+
for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry()))
861+
B->setParent(nullptr);
867862

868-
for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry())) {
869-
if (isa<VPRegionBlock>(B))
870-
B->setParent(nullptr);
871-
}
872863
VPBlockUtils::connectBlocks(Preheader, Header);
873-
VPBlockUtils::connectBlocks(ExitingVPBB, Middle);
864+
VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
874865
simplifyRecipes(Plan, CanIVTy);
866+
} else {
867+
// The vector region contains header phis for which we cannot remove the
868+
// loop region yet.
869+
LLVMContext &Ctx = SE.getContext();
870+
auto *BOC = new VPInstruction(
871+
VPInstruction::BranchOnCond,
872+
{Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc());
873+
ExitingVPBB->appendRecipe(BOC);
875874
}
876875

877876
VPlanTransforms::removeDeadRecipes(Plan);
@@ -1287,10 +1286,10 @@ void VPlanTransforms::optimize(VPlan &Plan) {
12871286
removeRedundantCanonicalIVs(Plan);
12881287
removeRedundantInductionCasts(Plan);
12891288

1290-
simplifyRecipes(Plan);
1289+
simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType());
12911290
legalizeAndOptimizeInductions(Plan);
12921291
removeRedundantExpandSCEVRecipes(Plan);
1293-
simplifyRecipes(Plan);
1292+
simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType());
12941293
removeDeadRecipes(Plan);
12951294

12961295
createAndOptimizeReplicateRegions(Plan);

0 commit comments

Comments
 (0)