Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2380,10 +2380,13 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {

// We just connected a new block to the scalar preheader. Update all
// VPPhis by adding an incoming value for it, replicating the last value.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: take the number of ScalarPH predecessors once, here, reuse it repeatedly inside the loop, asserting it is one more than the number of ResumePhi's operands.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks

unsigned NumPredecessors = ScalarPH->getNumPredecessors();
(void)NumPredecessors;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
(void)NumPredecessors;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed thanks

for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
auto *ResumePhi = cast<VPPhi>(&R);
ResumePhi->addOperand(
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
"must have incoming values for all operands");
R.addOperand(R.getOperand(NumPredecessors - 2));
}
}

Expand Down
20 changes: 12 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,10 @@ class VPPhiAccessors {
return getAsRecipe()->getNumOperands();
}

/// Removes the incoming value for \p IncomingBlock, which must be a
/// predecessor.
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
Expand Down Expand Up @@ -3526,22 +3530,22 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,

/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
/// types implementing VPPhiAccessors. Used by isa<> & co.
template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
static inline bool isPossible(const VPRecipeBase *f) {
template <typename SrcTy> struct CastIsPossible<VPPhiAccessors, SrcTy> {
static inline bool isPossible(SrcTy f) {
// TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
return isa<VPIRPhi, VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPhi>(f);
}
};
/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
template <>
struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
: public CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
template <typename SrcTy>
struct CastInfo<VPPhiAccessors, SrcTy>
: public CastIsPossible<VPPhiAccessors, SrcTy> {

using Self = CastInfo<VPPhiAccessors, const VPRecipeBase *>;
using Self = CastInfo<VPPhiAccessors, SrcTy>;

/// doCast is used by cast<>.
static inline VPPhiAccessors *doCast(const VPRecipeBase *R) {
static inline VPPhiAccessors *doCast(SrcTy R) {
return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
switch (R->getVPDefID()) {
case VPDef::VPInstructionSC:
Expand All @@ -3557,7 +3561,7 @@ struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
}

/// doCastIfPossible is used by dyn_cast<>.
static inline VPPhiAccessors *doCastIfPossible(const VPRecipeBase *f) {
static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
if (!Self::isPossible(f))
return nullptr;
return doCast(f);
Expand Down
82 changes: 41 additions & 41 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,10 @@ void VPlanTransforms::prepareForVectorization(
cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), Range);
HandledUncountableEarlyExit = true;
} else {
for (VPRecipeBase &R : EB->phis())
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
}

cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
Expand All @@ -526,32 +528,6 @@ void VPlanTransforms::prepareForVectorization(
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());

// If needed, add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop. Three cases:
// 1) If we require a scalar epilogue, there is no conditional branch as
// we unconditionally branch to the scalar preheader. Remove the recipes
// from the exit blocks.
// 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
// Thus if tail is to be folded, we know we don't need to run the
// remainder and we can set the condition to true.
// 3) Otherwise, construct a runtime check.

if (!RequiresScalarEpilogueCheck) {
if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
Plan.getEntry()->swapSuccessors();

// The exit blocks are unreachable, remove their recipes to make sure no
// users remain that may pessimize transforms.
for (auto *EB : Plan.getExitBlocks()) {
for (VPRecipeBase &R : make_early_inc_range(*EB))
R.eraseFromParent();
}
return;
}

// The connection order corresponds to the operands of the conditional branch,
// with the middle block already connected to the exit block.
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
Expand All @@ -561,21 +537,45 @@ void VPlanTransforms::prepareForVectorization(
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
Plan.getEntry()->swapSuccessors();

auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
// Here we use the same DebugLoc as the scalar loop latch terminator instead
// of the corresponding compare because they may have ended up with
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
// If MiddleVPBB has a single successor then the original loop does not exit
// via the latch and the single successor must be the scalar preheader.
// There's no need to add a runtime check to MiddleVPBB.
if (MiddleVPBB->getNumSuccessors() == 1) {
assert(MiddleVPBB->getSingleSuccessor() == ScalarPH &&
"must have ScalarPH as single successor");
return;
}

assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");

// Add a check in the middle block to see if we have completed$ all of the
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Add a check in the middle block to see if we have completed$ all of the
// Add a check in the middle block to see if we have completed all of the

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed, thanks!

// iterations in the first vector loop.
//
// Three cases:
// 1) If we require a scalar epilogue, the scalar ph must execute. Set the
// condition to false.
// 2) If (N - N%VF) == N, then we *don't* need to run the
// remainder. Thus if tail is to be folded, we know we don't need to run
// the remainder and we can set the condition to true.
// 3) Otherwise, construct a runtime check.

// We use the same DebugLoc as the scalar loop latch terminator instead of
// the corresponding compare because they may have ended up with different
// line numbers and we want to avoid awkward line stepping while debugging.
// E.g., if the compare has got a line number inside the loop.
DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc();
VPBuilder Builder(MiddleVPBB);
VPValue *Cmp =
TailFolded
? Plan.getOrAddLiveIn(ConstantInt::getTrue(
IntegerType::getInt1Ty(TripCount->getType()->getContext())))
: Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
&Plan.getVectorTripCount(),
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
VPValue *Cmp;
if (!RequiresScalarEpilogueCheck)
Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
IntegerType::getInt1Ty(TripCount->getType()->getContext())));
else if (TailFolded)
Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
IntegerType::getInt1Ty(TripCount->getType()->getContext())));
else
Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
&Plan.getVectorTripCount(), LatchDL, "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
}

void VPlanTransforms::createLoopRegions(VPlan &Plan) {
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,16 @@ void VPIRPhi::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
}

void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {
VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
auto &Preds = R->getParent()->getPredecessors();
assert(R->getNumOperands() == Preds.size() &&
"Number of phi operands must match number of predecessors");
unsigned Position = std::distance(Preds.begin(), find(Preds, IncomingBlock));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this part be getIndexFor(IncomingBlock)? Which is an API of a basic block rather than a phi recipe.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Split off to 861502304c9fe79c01e3382c52652e87ac03713e, as there are already existing users for this utility, thanks.

R->getOperand(Position)->removeUser(*R);
R->removeOperand(Position);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to keep these two changes atomic, taking care of both ends when removing an edge from def/use graph. Perhaps removeOperandAndUser?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, moved the removal also to removeOperand. I kept the name, as removing the operand requires also removing the user. Also documented the behavior.

}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPhiAccessors::printPhiOperands(raw_ostream &O,
VPSlotTracker &SlotTracker) const {
Expand Down
50 changes: 23 additions & 27 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,43 +1841,39 @@ void VPlanTransforms::truncateToMinimalBitwidths(
}
}

/// Remove BranchOnCond recipes with true conditions together with removing
/// dead edges to their successors.
static void removeBranchOnCondTrue(VPlan &Plan) {
/// Remove BranchOnCond recipes with true or false conditions together with
/// removing dead edges to their successors.
static void removeBranchOnConst(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Name of removeBranchOnCondTrue() should change, to, e.g., removeBranchOnConst()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated, thanks

for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
VPValue *Cond;
if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: why is the entry block excluded?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Entry will have 2 successors w/o branch. Will be fixed by introducing a branch recipe, possibly on an opaque condition, when connecting entry to vector.ph/scalar.ph.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right, perhaps worth a clarifying TODO.

!match(&VPBB->back(), m_BranchOnCond(m_True())))
!match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;

VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
unsigned RemovedIdx;
if (match(Cond, m_True()))
RemovedIdx = 1;
else if (match(Cond, m_False()))
RemovedIdx = 0;
else
continue;

VPBasicBlock *RemovedSucc =
cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
const auto &Preds = RemovedSucc->getPredecessors();
assert(count(Preds, VPBB) == 1 &&
"There must be a single edge between VPBB and its successor");
unsigned DeadIdx = std::distance(Preds.begin(), find(Preds, VPBB));

// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
// from these recipes.
// Values coming from VPBB into phi recipes of RemoveSucc are removed from
// these recipes.
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Operands are removed in place keeping recipes intact rather than replacing the recipes - is make_early_inc_range still needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed, thanks

assert((!isa<VPIRInstruction>(&R) ||
!isa<PHINode>(cast<VPIRInstruction>(&R)->getInstruction())) &&
!isa<VPHeaderPHIRecipe>(&R) &&
"Cannot update VPIRInstructions wrapping phis or header phis yet");
auto *VPI = dyn_cast<VPPhi>(&R);
if (!VPI)
auto *Phi = dyn_cast<VPPhiAccessors>(&R);
if (!Phi)
break;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Iterate over phis()? Which should be documented to return recipes that are also VPPhiAccessors?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks! Switched to using cast<>.

VPBuilder B(VPI);
SmallVector<VPValue *> NewOperands;
// Create new operand list, with the dead incoming value filtered out.
for (const auto &[Idx, Op] : enumerate(VPI->operands())) {
if (Idx == DeadIdx)
continue;
NewOperands.push_back(Op);
}
VPI->replaceAllUsesWith(
B.createScalarPhi(NewOperands, VPI->getDebugLoc(), VPI->getName()));
VPI->eraseFromParent();
assert((!isa<VPIRPhi>(&R) || RemovedSucc->getNumPredecessors() == 1) &&
"VPIRPhis must have a single predecessor");
Phi->removeIncomingValueFor(VPBB);
}
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
// automatically on VPlan destruction if it becomes unreachable.
Expand All @@ -1897,7 +1893,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
runPass(removeBranchOnCondTrue, Plan);
runPass(removeBranchOnConst, Plan);
runPass(removeDeadRecipes, Plan);

runPass(createAndOptimizeReplicateRegions, Plan);
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class VPSlotTracker;
class VPUser;
class VPRecipeBase;
class VPInterleaveRecipe;
class VPPhiAccessors;

// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
Expand Down Expand Up @@ -199,8 +200,13 @@ raw_ostream &operator<<(raw_ostream &OS, const VPRecipeBase &R);
/// This class augments VPValue with operands which provide the inverse def-use
/// edges from VPValue's users to their defs.
class VPUser {
/// Grant access to removeOperand for VPPhiAccessors, the only supported user.
friend class VPPhiAccessors;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why/is this befriending needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removeOperand is added as private, to discourage its used unless really necessary, hence befriending. Could also expose as protected or public instead

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth clarifying this discouraging motivation for friend - removeOperand() could be done by RAUW a new similar recipe built w/o the prescribed operand.

Currently removeOperand() is needed only by removeIncomingValue() for VPIRPhi's only. Suffice to provide VPIRPhi::removeIncomingValueFor() which would access a protected removeOperand()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

current VPPhiAccessors::removeIncomingValueFor supports any phi-like recipe, and is used for VPPhi and VPIRPhi. Updated the code in removeBranchOnConst to make use casting to VPPhiAccessors.


SmallVector<VPValue *, 2> Operands;

void removeOperand(unsigned Idx) { Operands.erase(Operands.begin() + Idx); }

protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the operands to \p O.
Expand Down