Skip to content

Commit e3d3754

Browse files
committed
!fixup address latest comments, thanks
1 parent 241c47c commit e3d3754

File tree

10 files changed

+127
-121
lines changed

10 files changed

+127
-121
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2760,15 +2760,6 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
27602760
return TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
27612761
}
27622762

2763-
static VPBasicBlock *getHeaderForMainVectorLoop(VPlan &Plan,
2764-
VPDominatorTree &VPDT) {
2765-
return find_singleton<VPBasicBlock>(
2766-
vp_depth_first_shallow(Plan.getEntry()), [&VPDT](VPBlockBase *VPB, bool) {
2767-
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
2768-
return VPBB && VPBB->isHeader(VPDT) ? VPBB : nullptr;
2769-
});
2770-
}
2771-
27722763
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
27732764
// Fix widened non-induction PHIs by setting up the PHI operands.
27742765
if (EnableVPlanNativePath)
@@ -2787,10 +2778,10 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
27872778
PSE.getSE()->forgetLoop(OrigLoop);
27882779
PSE.getSE()->forgetBlockAndLoopDispositions();
27892780

2790-
// Don't apply optimizations below when no vector loop remains, as they all
2781+
// Don't apply optimizations below when no (vector) loop remains, as they all
27912782
// require one at the moment.
27922783
VPBasicBlock *HeaderVPBB =
2793-
getHeaderForMainVectorLoop(*State.Plan, State.VPDT);
2784+
vputils::getTopLevelVectorLoopHeader(*State.Plan, State.VPDT);
27942785
if (!HeaderVPBB)
27952786
return;
27962787

@@ -7811,6 +7802,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78117802

78127803
VPBasicBlock *MiddleVPBB =
78137804
BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr;
7805+
VPlanTransforms::disolveLoopRegions(BestVPlan);
78147806
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
78157807
*Legal->getWidestInductionType());
78167808

@@ -7906,7 +7898,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
79067898
// 2.6. Maintain Loop Hints
79077899
// Keep all loop hints from the original loop on the vector loop (we'll
79087900
// replace the vectorizer-specific hints below).
7909-
VPBasicBlock *HeaderVPBB = getHeaderForMainVectorLoop(BestVPlan, State.VPDT);
7901+
VPBasicBlock *HeaderVPBB =
7902+
vputils::getTopLevelVectorLoopHeader(BestVPlan, State.VPDT);
79107903
if (HeaderVPBB) {
79117904
MDNode *OrigLoopID = OrigLoop->getLoopID();
79127905

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 72 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,17 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207207
return Parent->getEnclosingBlockWithPredecessors();
208208
}
209209

210-
bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
211-
return getPredecessors().size() == 2 &&
212-
VPDT.dominates(this, getPredecessors()[1]);
210+
bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
211+
const VPDominatorTree &VPDT) {
212+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
213+
if (!VPBB)
214+
return false;
215+
if (auto *R = VPBB->getParent())
216+
return !R->isReplicator() && VPBB->getNumPredecessors() == 0;
217+
218+
assert(!VPB->getParent() && "checking blocks in regions not implemented yet");
219+
return VPB->getPredecessors().size() == 2 &&
220+
VPDT.dominates(VPB, VPB->getPredecessors()[1]);
213221
}
214222

215223
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
@@ -425,22 +433,23 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
425433
if (ParentLoop && !State.LI->getLoopFor(NewBB))
426434
ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
427435

436+
auto Preds = to_vector(getHierarchicalPredecessors());
437+
if (VPBlockUtils::isHeader(this, State.VPDT)) {
438+
// There's no block yet for the latch, don't try to connect it yet.
439+
Preds = {Preds[0]};
440+
}
441+
428442
// Hook up the new basic block to its predecessors.
429-
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
443+
for (VPBlockBase *PredVPBlock : Preds) {
430444
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
431445
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
432446
BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB);
433-
if (!PredBB)
434-
continue;
435-
436447
assert(PredBB && "Predecessor basic-block not found building successor.");
437448
auto *PredBBTerminator = PredBB->getTerminator();
438449
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
439450

440451
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
441452
if (isa<UnreachableInst>(PredBBTerminator)) {
442-
if (PredVPSuccessors.size() == 2)
443-
continue;
444453
assert(PredVPSuccessors.size() == 1 &&
445454
"Predecessor ending w/o branch must have single successor.");
446455
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -496,7 +505,7 @@ void VPBasicBlock::execute(VPTransformState *State) {
496505
bool Replica = bool(State->Lane);
497506
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
498507

499-
if (isHeader(State->VPDT)) {
508+
if (VPBlockUtils::isHeader(this, State->VPDT)) {
500509
// Create and register the new vector loop.
501510
Loop *PrevParentLoop = State->CurrentParentLoop;
502511
State->CurrentParentLoop = State->LI->AllocateLoop();
@@ -539,11 +548,8 @@ void VPBasicBlock::execute(VPTransformState *State) {
539548
executeRecipes(State, NewBB);
540549

541550
// If this block is a latch, update CurrentParentLoop.
542-
if (any_of(getSuccessors(), [State, this](VPBlockBase *Succ) {
543-
auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
544-
return VPBB && VPBB->isHeader(State->VPDT) &&
545-
State->VPDT.dominates(Succ, this);
546-
}))
551+
if (getNumSuccessors() == 2 &&
552+
VPBlockUtils::isHeader(getSuccessors()[1], State->VPDT))
547553
State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
548554
}
549555

@@ -866,10 +872,11 @@ void VPRegionBlock::removeRegion() {
866872
VPBlockUtils::disconnectBlocks(this, Middle);
867873

868874
for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
869-
VPB->setParent(nullptr);
875+
VPB->setParent(getParent());
870876

871877
VPBlockUtils::connectBlocks(Preheader, Header);
872878
VPBlockUtils::connectBlocks(Exiting, Middle);
879+
VPBlockUtils::connectBlocks(Exiting, Header);
873880
}
874881

875882
VPlan::VPlan(Loop *L) {
@@ -981,57 +988,57 @@ void VPlan::execute(VPTransformState *State) {
981988
for (VPBlockBase *Block : RPOT)
982989
Block->execute(State);
983990

991+
VPBasicBlock *Header =
992+
vputils::getTopLevelVectorLoopHeader(*this, State->VPDT);
993+
if (!Header)
994+
return;
995+
996+
auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
997+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
998+
984999
// Fix the latch value of canonical, reduction and first-order recurrences
9851000
// phis in the vector loop.
986-
for (VPBasicBlock *Header :
987-
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
988-
if (!Header->isHeader(State->VPDT))
1001+
for (VPRecipeBase &R : Header->phis()) {
1002+
// Skip phi-like recipes that generate their backedege values themselves.
1003+
if (isa<VPWidenPHIRecipe>(&R))
9891004
continue;
990-
for (VPRecipeBase &R : Header->phis()) {
991-
if (isa<VPWidenPHIRecipe>(&R))
992-
continue;
9931005

994-
auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
995-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
996-
997-
if (isa<VPWidenInductionRecipe>(&R)) {
998-
PHINode *Phi = nullptr;
999-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1000-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1001-
} else {
1002-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1003-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1004-
"recipe generating only scalars should have been replaced");
1005-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1006-
Phi = cast<PHINode>(GEP->getPointerOperand());
1007-
}
1008-
1009-
Phi->setIncomingBlock(1, VectorLatchBB);
1010-
1011-
// Move the last step to the end of the latch block. This ensures
1012-
// consistent placement of all induction updates.
1013-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1014-
Inc->moveBefore(
1015-
std::prev(VectorLatchBB->getTerminator()->getIterator()));
1016-
1017-
// Use the steps for the last part as backedge value for the induction.
1018-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1019-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1020-
continue;
1006+
if (isa<VPWidenInductionRecipe>(&R)) {
1007+
PHINode *Phi = nullptr;
1008+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1009+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1010+
} else {
1011+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1012+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1013+
"recipe generating only scalars should have been replaced");
1014+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1015+
Phi = cast<PHINode>(GEP->getPointerOperand());
10211016
}
10221017

1023-
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1024-
// VPInstructions currently model scalar Phis only.
1025-
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1026-
(isa<VPReductionPHIRecipe>(PhiR) &&
1027-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1028-
1029-
Value *Phi = State->get(PhiR, NeedsScalar);
1030-
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1031-
// not.
1032-
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1033-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1018+
Phi->setIncomingBlock(1, VectorLatchBB);
1019+
1020+
// Move the last step to the end of the latch block. This ensures
1021+
// consistent placement of all induction updates.
1022+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1023+
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
1024+
1025+
// Use the steps for the last part as backedge value for the induction.
1026+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1027+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1028+
continue;
10341029
}
1030+
1031+
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1032+
// VPInstructions currently model scalar Phis only.
1033+
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1034+
(isa<VPReductionPHIRecipe>(PhiR) &&
1035+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1036+
1037+
Value *Phi = State->get(PhiR, NeedsScalar);
1038+
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1039+
// not.
1040+
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1041+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10351042
}
10361043
}
10371044

@@ -1390,17 +1397,18 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
13901397

13911398
#endif
13921399

1393-
bool VPValue::isDefinedOutsideLoopRegions() const {
1400+
bool VPValue::isDefinedOutsideLoop() const {
13941401
auto *DefR = getDefiningRecipe();
13951402
if (!DefR)
13961403
return true;
13971404

1405+
// For non-live-ins, check if is in a region only if the top-level loop region
1406+
// still exits.
13981407
const VPBasicBlock *DefVPBB = DefR->getParent();
13991408
auto *Plan = DefVPBB->getPlan();
1400-
if (Plan->getVectorLoopRegion())
1401-
return !DefR->getParent()->getEnclosingLoopRegion();
1402-
return DefVPBB == Plan->getEntry();
1409+
return Plan->getVectorLoopRegion() && !DefVPBB->getEnclosingLoopRegion();
14031410
}
1411+
14041412
void VPValue::replaceAllUsesWith(VPValue *New) {
14051413
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
14061414
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,9 +1589,7 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
15891589
return getOperand(0);
15901590
}
15911591

1592-
bool isInvariantCond() const {
1593-
return getCond()->isDefinedOutsideLoopRegions();
1594-
}
1592+
bool isInvariantCond() const { return getCond()->isDefinedOutsideLoop(); }
15951593

15961594
/// Returns true if the recipe only uses the first lane of operand \p Op.
15971595
bool onlyFirstLaneUsed(const VPValue *Op) const override {
@@ -1604,17 +1602,16 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
16041602
/// A recipe for handling GEP instructions.
16051603
class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
16061604
bool isPointerLoopInvariant() const {
1607-
return getOperand(0)->isDefinedOutsideLoopRegions();
1605+
return getOperand(0)->isDefinedOutsideLoop();
16081606
}
16091607

16101608
bool isIndexLoopInvariant(unsigned I) const {
1611-
return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1609+
return getOperand(I + 1)->isDefinedOutsideLoop();
16121610
}
16131611

16141612
bool areAllOperandsInvariant() const {
1615-
return all_of(operands(), [](VPValue *Op) {
1616-
return Op->isDefinedOutsideLoopRegions();
1617-
});
1613+
return all_of(operands(),
1614+
[](VPValue *Op) { return Op->isDefinedOutsideLoop(); });
16181615
}
16191616

16201617
public:
@@ -3415,9 +3412,6 @@ class VPBasicBlock : public VPBlockBase {
34153412
/// second predecessor is the exiting block of the region.
34163413
const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
34173414

3418-
/// Returns true if the block is a loop header in a plain-CFG VPlan.
3419-
bool isHeader(const VPDominatorTree &VPDT) const;
3420-
34213415
protected:
34223416
/// Execute the recipes in the IR basic block \p BB.
34233417
void executeRecipes(VPTransformState *State, BasicBlock *BB);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
576576
case VPInstruction::BranchOnCond: {
577577
Value *Cond = State.get(getOperand(0), VPLane(0));
578578
// Replace the temporary unreachable terminator with a new conditional
579-
// branch, hooking it up to backward destination for exiting blocks now and
580-
// to forward destination(s) later when they are created.
579+
// branch, hooking it up to backward destination (header) for latch blocks
580+
// now to forward destination(s) later when they are created.
581581
BranchInst *CondBr =
582582
Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
583583
CondBr->setSuccessor(0, nullptr);
@@ -600,10 +600,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
600600
VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
601601

602602
// Replace the temporary unreachable terminator with a new conditional
603-
// branch, hooking it up to backward destination (the header) now and to the
604-
// forward destination (the exit/middle block) later when it is created.
605-
// Note that CreateCondBr expects a valid BB as first argument, so we need
606-
// to set it to nullptr later.
603+
// branch, hooking it up to backward destination (the header) for latch
604+
// blocks now forward destination (the exit/middle block) later when it is
605+
// created. Note that CreateCondBr expects a valid BB as first argument, so
606+
// we need to set it to nullptr later.
607607
BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
608608
State.CFG.VPBB2IRBB[Header]);
609609
CondBr->setSuccessor(0, nullptr);
@@ -1560,7 +1560,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
15601560
InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,
15611561
VPCostContext &Ctx) const {
15621562
SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1563-
bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1563+
bool ScalarCond = getOperand(0)->isDefinedOutsideLoop();
15641564
Type *ScalarTy = Ctx.Types.inferScalarType(this);
15651565
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
15661566

@@ -1784,7 +1784,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
17841784
RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
17851785

17861786
if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1787-
getOperand(1)->isDefinedOutsideLoopRegions())
1787+
getOperand(1)->isDefinedOutsideLoop())
17881788
RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
17891789
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
17901790
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
@@ -2634,13 +2634,12 @@ static void scalarizeInstruction(const Instruction *Instr,
26342634
if (auto *II = dyn_cast<AssumeInst>(Cloned))
26352635
State.AC->registerAssumption(II);
26362636

2637-
assert(
2638-
(RepRecipe->getParent()->getParent() ||
2639-
!RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
2640-
all_of(RepRecipe->operands(),
2641-
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
2642-
"Expected a recipe is either within a region or all of its operands "
2643-
"are defined outside the vectorized region.");
2637+
assert((RepRecipe->getParent()->getParent() ||
2638+
!RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
2639+
all_of(RepRecipe->operands(),
2640+
[](VPValue *Op) { return Op->isDefinedOutsideLoop(); })) &&
2641+
"Expected a recipe is either within a region or all of its operands "
2642+
"are defined outside the vectorized region.");
26442643
}
26452644

26462645
void VPReplicateRecipe::execute(VPTransformState &State) {

0 commit comments

Comments
 (0)