Skip to content
9 changes: 0 additions & 9 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9780,15 +9780,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
"UncountableEarlyExitLoopsDisabled", ORE, L);
return false;
}
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
// TODO: Support multiple uncountable early exits.
if (ExitingBlocks.size() - LVL.getCountableExitingBlocks().size() > 1) {
reportVectorizationFailure("Auto-vectorization of loops with multiple "
"uncountable early exits is not yet supported",
"MultipleUncountableEarlyExits", ORE, L);
return false;
}
}

if (!LVL.getPotentiallyFaultingLoads().empty()) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1398,6 +1398,9 @@ class VPPhiAccessors {
/// Returns the incoming block with index \p Idx.
const VPBasicBlock *getIncomingBlock(unsigned Idx) const;

/// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;

/// Returns the number of incoming values, also number of incoming blocks.
virtual unsigned getNumIncoming() const {
return getAsRecipe()->getNumOperands();
Expand Down
35 changes: 15 additions & 20 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -873,33 +873,28 @@ void VPlanTransforms::handleEarlyExits(VPlan &Plan,
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);

// Disconnect all early exits from the loop leaving it with a single exit from
// the latch. Early exits that are countable are left for a scalar epilog. The
// condition of uncountable early exits (currently at most one is supported)
// is fused into the latch exit, and used to branch from middle block to the
// early exit destination.
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
// Disconnect countable early exits from the loop, leaving it with a single
// exit from the latch. Countable early exits are left for a scalar epilog.
// When there are uncountable early exits, skip this loop entirely - they are
// handled separately in handleUncountableEarlyExits.
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
if (Pred == MiddleVPBB)
if (Pred == MiddleVPBB || HasUncountableEarlyExit)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When HasUncountableEarlyExit=true, rather than constructing lists of all the blocks and their predecessors only to ignore them, why not just bail out early? For example,

  if (HasUncountableEarlyExit) {
    handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
                                MiddleVPBB);
    return;
  }

  for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
    ....

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, updated, thanks

continue;
if (HasUncountableEarlyExit) {
assert(!HandledUncountableEarlyExit &&
"can handle exactly one uncountable early exit");
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
HandledUncountableEarlyExit = true;
} else {
for (VPRecipeBase &R : EB->phis())
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
}
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();

// Remove phi operands for the early exiting block.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you follow my suggestion above you can just add an assert here, i.e.

  assert(!HasUncountableEarlyExit && "Should have been handled earlier");

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not add an assertion yet for now, as we have the early exit above now

for (VPRecipeBase &R : EB->phis())
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
EarlyExitingVPBB->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
}

assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
"missed an uncountable exit that must be handled");
if (HasUncountableEarlyExit) {
handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
MiddleVPBB);
}
}

void VPlanTransforms::addMiddleCheck(VPlan &Plan,
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1718,6 +1718,14 @@ void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {
R->removeOperand(Position);
}

VPValue *
VPPhiAccessors::getIncomingValueForBlock(const VPBasicBlock *VPBB) const {
for (unsigned Idx = 0; Idx != getNumIncoming(); ++Idx)
if (getIncomingBlock(Idx) == VPBB)
return getIncomingValue(Idx);
llvm_unreachable("VPBB is not an incoming block");
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPhiAccessors::printPhiOperands(raw_ostream &O,
VPSlotTracker &SlotTracker) const {
Expand Down
203 changes: 137 additions & 66 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3944,75 +3944,147 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
R->eraseFromParent();
}

void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
VPBasicBlock *EarlyExitVPBB,
VPlan &Plan,
VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB) {
auto *MiddleVPBB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[0]);
if (!EarlyExitVPBB->getSinglePredecessor() &&
EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB) {
assert(EarlyExitVPBB->getNumPredecessors() == 2 &&
EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB &&
"unsupported early exit VPBB");
// Early exit operand should always be last phi operand. If EarlyExitVPBB
// has two predecessors and EarlyExitingVPBB is the first, swap the operands
// of the phis.
for (VPRecipeBase &R : EarlyExitVPBB->phis())
cast<VPIRPhi>(&R)->swapOperands();
}
void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't looked at this yet, but I'll try to review this code later today!

VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB,
VPBasicBlock *MiddleVPBB) {
struct EarlyExitInfo {
VPBasicBlock *EarlyExitingVPBB;
VPIRBasicBlock *EarlyExitVPBB;
VPValue *CondToExit;
};

VPBuilder Builder(LatchVPBB->getTerminator());
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) &&
"Terminator must be be BranchOnCond");
VPValue *CondOfEarlyExitingVPBB =
EarlyExitingVPBB->getTerminator()->getOperand(0);
auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
? CondOfEarlyExitingVPBB
: Builder.createNot(CondOfEarlyExitingVPBB);

// Create a BranchOnTwoConds in the latch that branches to:
// [0] vector.early.exit, [1] middle block, [2] header (continue looping).
VPValue *IsEarlyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
VPBasicBlock *VectorEarlyExitVPBB =
Plan.createVPBasicBlock("vector.early.exit");
VectorEarlyExitVPBB->setParent(EarlyExitVPBB->getParent());

VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);

// Update the exit phis in the early exit block.
VPBuilder MiddleBuilder(MiddleVPBB);
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
// a single predecessor and 1 if it has two.
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
if (ExitIRI->getNumOperands() != 1) {
// The first of two operands corresponds to the latch exit, via MiddleVPBB
// predecessor. Extract its final lane.
ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
SmallVector<EarlyExitInfo> Exits;
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
if (Pred == MiddleVPBB)
continue;
// Collect condition for this early exit.
auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) &&
"Terminator must be BranchOnCond");
VPValue *CondOfEarlyExitingVPBB =
EarlyExitingVPBB->getTerminator()->getOperand(0);
auto *CondToEarlyExit = TrueSucc == EB
? CondOfEarlyExitingVPBB
: Builder.createNot(CondOfEarlyExitingVPBB);
Exits.push_back({
EarlyExitingVPBB,
EB,
CondToEarlyExit,
});
}
}

// Sort exits by dominance to get the correct program order.
VPDominatorTree VPDT(Plan);
llvm::sort(Exits, [&VPDT](const EarlyExitInfo &A, const EarlyExitInfo &B) {
return VPDT.dominates(A.EarlyExitingVPBB, B.EarlyExitingVPBB);
});

VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
if (!isa<VPIRValue>(IncomingFromEarlyExit)) {
// Update the incoming value from the early exit.
VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
VPInstruction::FirstActiveLane, {CondToEarlyExit},
DebugLoc::getUnknown(), "first.active.lane");
IncomingFromEarlyExit = EarlyExitB.createNaryOp(
VPInstruction::ExtractLane, {FirstActiveLane, IncomingFromEarlyExit},
DebugLoc::getUnknown(), "early.exit.value");
ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
// Build the AnyOf condition for the latch terminator. For multiple exits,
// also create an exit dispatch block to determine which exit to take.
VPValue *Combined = Exits[0].CondToExit;
for (const auto &Exit : drop_begin(Exits))
Combined = Builder.createOr(Combined, Exit.CondToExit);
VPValue *IsAnyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {Combined});

VPSymbolicValue FirstActiveLane;
// Process exits in reverse order so phi operands are added in the order
// matching the original program order (last exit's operand added first
// becomes last). The vector is reversed afterwards to restore forward order
// for the dispatch logic.
SmallVector<VPBasicBlock *> VectorEarlyExitVPBBs;
for (const auto &[EarlyExitingVPBB, EarlyExitVPBB, CondToExit] :
reverse(Exits)) {
VPBasicBlock *VectorEarlyExitVPBB =
Plan.createVPBasicBlock("vector.early.exit");
VectorEarlyExitVPBB->setParent(EarlyExitVPBB->getParent());
VectorEarlyExitVPBBs.push_back(VectorEarlyExitVPBB);

for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
VPValue *IncomingVal =
ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);

// Compute the incoming value for this early exit.
VPValue *NewIncoming = IncomingVal;
if (!isa<VPIRValue>(IncomingVal)) {
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
NewIncoming = EarlyExitB.createNaryOp(
VPInstruction::ExtractLane, {&FirstActiveLane, IncomingVal},
DebugLoc::getUnknown(), "early.exit.value");
}
ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
// Add the new incoming value for this early exit.
ExitIRI->addOperand(NewIncoming);
}

EarlyExitingVPBB->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
}
VectorEarlyExitVPBBs = to_vector(llvm::reverse(VectorEarlyExitVPBBs));

// Replace the conditional branch controlling the latch exit from the vector
// loop with a multi-conditional branch exiting to vector early exit if the
// early exit has been taken, exiting to middle block if the original
// condition of the vector latch is true, otherwise continuing back to header.
// For exit blocks that also have the middle block as predecessor (latch
// exit to the same block as an early exit), extract the last lane of the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// exit to the same block as an early exit), extract the last lane of the
// exits to the same block as an early exit), extract the last lane of the

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated, thanks

// first operand for the middle block's incoming value.
VPBuilder MiddleBuilder(MiddleVPBB);
for (VPRecipeBase &R :
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
if (ExitIRI->getNumOperands() == 1)
continue;
ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
}

if (Exits.size() != 1) {
VPBasicBlock *DispatchBB = Plan.createVPBasicBlock("vector.early.exit");
DispatchBB->setParent(VectorEarlyExitVPBBs[0]->getParent());
// In the dispatch block, compute the first active lane across all
// conditions and chain through exits.
VPBuilder DispatchBuilder(DispatchBB);
// Chain through exits: for each exit, check if its condition is true at the
// first active lane. If so, take that exit. Otherwise, try the next exit.
VPBasicBlock *CurrentBB = DispatchBB;
for (auto [I, Exit] : enumerate(ArrayRef(Exits).drop_back())) {
VPValue *LaneVal = DispatchBuilder.createNaryOp(
VPInstruction::ExtractLane, {&FirstActiveLane, Exit.CondToExit},
DebugLoc::getUnknown(), "exit.cond.at.lane");

// For the last dispatch, branch directly to the last exit on false;
// otherwise, create a new check block.
bool IsLastDispatch = (I + 2 == Exits.size());
VPBasicBlock *FalseBB =
IsLastDispatch ? VectorEarlyExitVPBBs.back()
: Plan.createVPBasicBlock("vector.early.exit.check");
if (!IsLastDispatch)
FalseBB->setParent(LatchVPBB->getParent());

DispatchBuilder.createNaryOp(VPInstruction::BranchOnCond, {LaneVal});
CurrentBB->setSuccessors({VectorEarlyExitVPBBs[I], FalseBB});
VectorEarlyExitVPBBs[I]->setPredecessors({CurrentBB});
FalseBB->setPredecessors({CurrentBB});

if (!IsLastDispatch) {
CurrentBB = FalseBB;
DispatchBuilder.setInsertPoint(CurrentBB);
}
}
VectorEarlyExitVPBBs[0] = DispatchBB;
}

VPBuilder DispatchBuilder(VectorEarlyExitVPBBs[0],
VectorEarlyExitVPBBs[0]->begin());
VPValue *FirstLane =
DispatchBuilder.createNaryOp(VPInstruction::FirstActiveLane, {Combined},
DebugLoc::getUnknown(), "first.active.lane");
FirstActiveLane.replaceAllUsesWith(FirstLane);

// Replace the latch terminator with the new branching logic.
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
"Unexpected terminator");
Expand All @@ -4022,13 +4094,12 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,

DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
LatchExitingBranch->eraseFromParent();

Builder.setInsertPoint(LatchVPBB);
Builder.createNaryOp(VPInstruction::BranchOnTwoConds,
{IsEarlyExitTaken, IsLatchExitTaken}, LatchDL);
{IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
LatchVPBB->clearSuccessors();
LatchVPBB->setSuccessors({VectorEarlyExitVPBB, MiddleVPBB, HeaderVPBB});
VectorEarlyExitVPBB->setPredecessors({LatchVPBB});
LatchVPBB->setSuccessors({VectorEarlyExitVPBBs[0], MiddleVPBB, HeaderVPBB});
VectorEarlyExitVPBBs[0]->setPredecessors({LatchVPBB});
}

/// This function tries convert extended in-loop reductions to
Expand Down
15 changes: 7 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,13 @@ struct VPlanTransforms {
/// Remove dead recipes from \p Plan.
static void removeDeadRecipes(VPlan &Plan);

/// Update \p Plan to account for the uncountable early exit from \p
/// EarlyExitingVPBB to \p EarlyExitVPBB by introducing a BranchOnTwoConds
/// terminator in the latch that handles the early exit and the latch exit
/// condition.
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
VPBasicBlock *EarlyExitVPBB,
VPlan &Plan, VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB);
/// Update \p Plan to account for uncountable early exits by introducing
/// appropriate branching logic in the latch that handles early exits and the
/// latch exit condition. Multiple exits are handled with a dispatch block
/// that determines which exit to take based on lane-by-lane semantics.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB,
VPBasicBlock *MiddleVPBB);

/// Replace loop regions with explicit CFG.
static void dissolveLoopRegions(VPlan &Plan);
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,10 @@ loop.end:
}


; Multiple uncountable early exits pass legality but are not yet supported
; in VPlan transformations.
; Multiple uncountable early exits are now supported.
define i64 @multiple_uncountable_exits() {
; CHECK-LABEL: LV: Checking a loop in 'multiple_uncountable_exits'
; CHECK: LV: We can vectorize this loop!
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with multiple uncountable early exits is not yet supported.
entry:
%p1 = alloca [1024 x i8]
%p2 = alloca [1024 x i8]
Expand Down
Loading