Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 8 additions & 14 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4035,7 +4035,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPScalarIVStepsSC:
case VPDef::VPReplicateSC:
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
case VPDef::VPVectorEndPointerSC:
case VPDef::VPExpandSCEVSC:
Expand Down Expand Up @@ -8256,6 +8255,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
LoopRegion->clearCanonicalIVNUW();
}

// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -8318,8 +8318,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
&R) ||
if (isa<VPWidenCanonicalIVRecipe, VPBlendRecipe>(&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
continue;

Expand Down Expand Up @@ -8509,8 +8508,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder, BlockMaskCache, nullptr /*LVer*/);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
Expand Down Expand Up @@ -9268,8 +9265,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
for (VPRecipeBase &R :
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
EpiWidenedPhis.insert(
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
}
Expand Down Expand Up @@ -9330,10 +9325,10 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPPhi *ResumePhi = nullptr;
if (ResumePhiIter == MainScalarPH->phis().end()) {
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC);
ResumePhi = ScalarPHBuilder.createScalarPhi(
{VectorTC,
MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()},
{}, "vec.epilog.resume.val");
{VectorTC, MainPlan.getOrAddLiveIn(Constant::getNullValue(Ty))}, {},
"vec.epilog.resume.val");
} else {
ResumePhi = cast<VPPhi>(&*ResumePhiIter);
if (MainScalarPH->begin() == MainScalarPH->end())
Expand All @@ -9360,7 +9355,6 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");

VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV();
// When vectorizing the epilogue loop, the canonical induction needs to be
// adjusted by the value after the main vector loop. Find the resume value
// created during execution of the main VPlan. It must be the first phi in the
Expand Down Expand Up @@ -9390,6 +9384,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
EPI.VectorTripCount = EPResumeVal->getOperand(0);
}
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: better rename VPV

VPValue *IV = VectorLoop->getCanonicalIV();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This replaces an if (auto *IV = dyn_cast...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep originally part of the loop

assert(all_of(IV->users(),
[](const VPUser *U) {
return isa<VPScalarIVStepsRecipe>(U) ||
Expand All @@ -9408,9 +9403,8 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
DenseMap<Value *, Value *> ToFrozen;
SmallVector<Instruction *> InstsToMove;
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop. Skip the canonical IV, which has been
// handled above.
for (VPRecipeBase &R : drop_begin(Header->phis())) {
// vectorizing the epilogue loop.
for (VPRecipeBase &R : Header->phis()) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
Expand Down
76 changes: 64 additions & 12 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -746,10 +746,13 @@ VPRegionBlock *VPRegionBlock::clone() {
VPRegionBlock *NewRegion =
isReplicator()
? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
: Plan.createLoopRegion(getName(), NewEntry, NewExiting);
: Plan.createLoopRegion(CanIVInfo->getType(),
CanIVInfo->getDebugLoc(), getName(), NewEntry,
NewExiting);

for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);

return NewRegion;
}

Expand Down Expand Up @@ -834,6 +837,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
auto NewIndent = Indent + " ";
if (!isReplicator()) {
O << '\n';
getCanonicalIV()->print(O, SlotTracker);
O << " = CANONICAL-IV\n";
}
for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
O << '\n';
BlockBase->print(O, NewIndent, SlotTracker);
Expand All @@ -846,18 +854,30 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,

void VPRegionBlock::dissolveToCFGLoop() {
auto *Header = cast<VPBasicBlock>(getEntry());
if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
assert(this == getPlan()->getVectorLoopRegion() &&
"Canonical IV must be in the entry of the top-level loop region");
auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
{CanIV->getStartValue(), CanIV->getBackedgeValue()},
CanIV->getDebugLoc(), "index");
auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
VPValue *CanIV = getCanonicalIV();
if (CanIV && CanIV->getNumUsers() > 0) {
VPlan &Plan = *getPlan();
VPInstruction *CanIVInc = getCanonicalIVIncrement();
// If the increment doesn't exist yet, create it.
if (!CanIVInc) {
auto *ExitingTerm = ExitingLatch->getTerminator();
CanIVInc =
VPBuilder(ExitingTerm)
.createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()},
{CanIVInfo->hasNUW(), /* HasNSW */ false},
CanIVInfo->getDebugLoc(), "index.next");
}
auto *ScalarR =
VPBuilder(Header, Header->begin())
.createScalarPhi(
{Plan.getOrAddLiveIn(ConstantInt::get(CanIVInfo->getType(), 0)),
CanIVInc},
CanIVInfo->getDebugLoc(), "index");
CanIV->replaceAllUsesWith(ScalarR);
CanIV->eraseFromParent();
}

VPBlockBase *Preheader = getSinglePredecessor();
auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
VPBlockBase *Middle = getSingleSuccessor();
VPBlockUtils::disconnectBlocks(Preheader, this);
VPBlockUtils::disconnectBlocks(this, Middle);
Expand All @@ -870,6 +890,24 @@ void VPRegionBlock::dissolveToCFGLoop() {
VPBlockUtils::connectBlocks(ExitingLatch, Header);
}

VPInstruction *VPRegionBlock::getCanonicalIVIncrement() {
auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
VPValue *CanIV = getCanonicalIV();
assert(CanIV && "Expected a canonical IV");

auto *ExitingTerm = ExitingLatch->getTerminator();
VPInstruction *CanIVInc = nullptr;
if (match(ExitingTerm,
m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) {
assert(match(CanIVInc,
m_c_Add(m_CombineOr(m_Specific(CanIV),
m_c_Add(m_Specific(CanIV), m_LiveIn())),
m_VPValue())) &&
"invalid existing IV increment");
}
return CanIVInc;
}

VPlan::VPlan(Loop *L) {
setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
ScalarHeader = createVPIRBasicBlock(L->getHeader());
Expand All @@ -894,7 +932,11 @@ VPlan::~VPlan() {
for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
R.setOperand(I, &DummyValue);
}
} else if (!cast<VPRegionBlock>(VPB)->isReplicator()) {
cast<VPRegionBlock>(VPB)->getCanonicalIV()->replaceAllUsesWith(
&DummyValue);
}

delete VPB;
}
for (VPValue *VPV : getLiveIns())
Expand Down Expand Up @@ -1203,6 +1245,11 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.

if (auto *LoopRegion = getVectorLoopRegion()) {
Old2NewVPValues[LoopRegion->getCanonicalIV()] =
NewPlan->getVectorLoopRegion()->getCanonicalIV();
}
Comment on lines +1248 to +1251
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (auto *LoopRegion = getVectorLoopRegion()) {
Old2NewVPValues[LoopRegion->getCanonicalIV()] =
NewPlan->getVectorLoopRegion()->getCanonicalIV();
}
if (auto *CanIV = getCanonicalIV())
Old2NewVPValues[CanIV] = NewPlan->getCanonicalIV();

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The patch currently removes VPlan::getCanonicalIV in favor of always retrieving it via the loop region defining it.


remapOperands(Entry, NewEntry, Old2NewVPValues);

// Initialize remaining fields of cloned VPlan.
Expand Down Expand Up @@ -1383,6 +1430,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
/// Returns true if there is a vector loop region and \p VPV is defined in a
/// loop region.
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
if (isa<VPRegionValue>(VPV))
return true;
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion());
Expand Down Expand Up @@ -1492,9 +1541,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {

ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
for (const VPBasicBlock *VPBB :
VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
assignNames(VPBB);
for (const VPBlockBase *VPB : RPOT) {
if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB))
assignNames(VPBB);
else if (!cast<VPRegionBlock>(VPB)->isReplicator())
assignName(cast<VPRegionBlock>(VPB)->getCanonicalIV());
}
}

void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
Expand Down
Loading
Loading