Skip to content
9 changes: 0 additions & 9 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9778,15 +9778,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
"UncountableEarlyExitLoopsDisabled", ORE, L);
return false;
}
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
// TODO: Support multiple uncountable early exits.
if (ExitingBlocks.size() - LVL.getCountableExitingBlocks().size() > 1) {
reportVectorizationFailure("Auto-vectorization of loops with multiple "
"uncountable early exits is not yet supported",
"MultipleUncountableEarlyExits", ORE, L);
return false;
}
}

if (!LVL.getPotentiallyFaultingLoads().empty()) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1452,6 +1452,9 @@ class VPPhiAccessors {
/// Returns the incoming block with index \p Idx.
const VPBasicBlock *getIncomingBlock(unsigned Idx) const;

/// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;

/// Returns the number of incoming values, also number of incoming blocks.
virtual unsigned getNumIncoming() const {
return getAsRecipe()->getNumOperands();
Expand Down
35 changes: 15 additions & 20 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -873,33 +873,28 @@ void VPlanTransforms::handleEarlyExits(VPlan &Plan,
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);

// Disconnect all early exits from the loop leaving it with a single exit from
// the latch. Early exits that are countable are left for a scalar epilog. The
// condition of uncountable early exits (currently at most one is supported)
// is fused into the latch exit, and used to branch from middle block to the
// early exit destination.
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
// Disconnect countable early exits from the loop, leaving it with a single
// exit from the latch. Countable early exits are left for a scalar epilog.
// When there are uncountable early exits, skip this loop entirely - they are
// handled separately in handleUncountableEarlyExits.
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
if (Pred == MiddleVPBB)
if (Pred == MiddleVPBB || HasUncountableEarlyExit)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When HasUncountableEarlyExit=true, rather than constructing lists of all the blocks and their predecessors only to ignore them, why not just bail out early? For example,

  if (HasUncountableEarlyExit) {
    handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
                                MiddleVPBB);
    return;
  }

  for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
    ....

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, updated, thanks

continue;
if (HasUncountableEarlyExit) {
assert(!HandledUncountableEarlyExit &&
"can handle exactly one uncountable early exit");
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
HandledUncountableEarlyExit = true;
} else {
for (VPRecipeBase &R : EB->phis())
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
}
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();

// Remove phi operands for the early exiting block.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you follow my suggestion above you can just add an assert here, i.e.

  assert(!HasUncountableEarlyExit && "Should have been handled earlier");

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not add an assertion yet for now, as we have the early exit above now

for (VPRecipeBase &R : EB->phis())
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
EarlyExitingVPBB->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
}

assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
"missed an uncountable exit that must be handled");
if (HasUncountableEarlyExit) {
handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
MiddleVPBB);
}
}

void VPlanTransforms::addMiddleCheck(VPlan &Plan,
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,14 @@ void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {
R->removeOperand(Position);
}

VPValue *
VPPhiAccessors::getIncomingValueForBlock(const VPBasicBlock *VPBB) const {
for (unsigned Idx = 0; Idx != getNumIncoming(); ++Idx)
if (getIncomingBlock(Idx) == VPBB)
return getIncomingValue(Idx);
llvm_unreachable("VPBB is not an incoming block");
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPhiAccessors::printPhiOperands(raw_ostream &O,
VPSlotTracker &SlotTracker) const {
Expand Down
206 changes: 140 additions & 66 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3949,75 +3949,150 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
R->eraseFromParent();
}

void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
VPBasicBlock *EarlyExitVPBB,
VPlan &Plan,
VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB) {
auto *MiddleVPBB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[0]);
if (!EarlyExitVPBB->getSinglePredecessor() &&
EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB) {
assert(EarlyExitVPBB->getNumPredecessors() == 2 &&
EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB &&
"unsupported early exit VPBB");
// Early exit operand should always be last phi operand. If EarlyExitVPBB
// has two predecessors and EarlyExitingVPBB is the first, swap the operands
// of the phis.
for (VPRecipeBase &R : EarlyExitVPBB->phis())
cast<VPIRPhi>(&R)->swapOperands();
}
void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't looked at this yet, but I'll try to review this code later today!

VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB,
VPBasicBlock *MiddleVPBB) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the middle block not easily available from the VPlan object, i.e. getMiddleBlock()? If this is a different middle block to the one returned by that function it's probably worth adding a comment explaining.

struct EarlyExitInfo {
VPBasicBlock *EarlyExitingVPBB;
VPIRBasicBlock *EarlyExitVPBB;
VPValue *CondToExit;
};

VPBuilder Builder(LatchVPBB->getTerminator());
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) &&
"Terminator must be be BranchOnCond");
VPValue *CondOfEarlyExitingVPBB =
EarlyExitingVPBB->getTerminator()->getOperand(0);
auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
? CondOfEarlyExitingVPBB
: Builder.createNot(CondOfEarlyExitingVPBB);

// Create a BranchOnTwoConds in the latch that branches to:
// [0] vector.early.exit, [1] middle block, [2] header (continue looping).
VPValue *IsEarlyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
VPBasicBlock *VectorEarlyExitVPBB =
Plan.createVPBasicBlock("vector.early.exit");
VectorEarlyExitVPBB->setParent(EarlyExitVPBB->getParent());

VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);

// Update the exit phis in the early exit block.
VPBuilder MiddleBuilder(MiddleVPBB);
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
// a single predecessor and 1 if it has two.
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
if (ExitIRI->getNumOperands() != 1) {
// The first of two operands corresponds to the latch exit, via MiddleVPBB
// predecessor. Extract its final lane.
ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
SmallVector<EarlyExitInfo> Exits;
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At what stage of the VPlan construction are we at this point? For example, when does the BranchOnTwoConds VPInstruction get introduced? It might be good to have some documentation explaining what we expect the VPlan CFG to look like at this stage in the set of transforms.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe worth renaming EB to EarlyExitVPBB so that it's consistent with the name in the structure? I think it's perhaps more obvious/readable this way.

for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this means the VPRegionBlock must have been dissolved at this point? Otherwise the exiting VPBB would have no successors, and the successors would be tied to the region block itself. Perhaps worth asserting the region has been dissolved at this point?

if (Pred == MiddleVPBB)
continue;
// Collect condition for this early exit.
auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth just always calling match here so that you can also obtain the condition? For example, something like:

  VPValue *CondOfEarlyExitingVPBB;
  [[maybe_unused]] bool Matched = match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(CondOfEarlyExitingVPBB));
  assert(Matched && "Terminator must be BranchOnCond");

"Terminator must be BranchOnCond");
VPValue *CondOfEarlyExitingVPBB =
EarlyExitingVPBB->getTerminator()->getOperand(0);
auto *CondToEarlyExit = TrueSucc == EB
? CondOfEarlyExitingVPBB
: Builder.createNot(CondOfEarlyExitingVPBB);
Exits.push_back({
EarlyExitingVPBB,
EB,
CondToEarlyExit,
});
}
}

// Sort exits by dominance to get the correct program order.
VPDominatorTree VPDT(Plan);
llvm::sort(Exits, [&VPDT](const EarlyExitInfo &A, const EarlyExitInfo &B) {
return VPDT.dominates(A.EarlyExitingVPBB, B.EarlyExitingVPBB);
});

VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
if (!isa<VPIRValue>(IncomingFromEarlyExit)) {
// Update the incoming value from the early exit.
VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
VPInstruction::FirstActiveLane, {CondToEarlyExit},
DebugLoc::getUnknown(), "first.active.lane");
IncomingFromEarlyExit = EarlyExitB.createNaryOp(
VPInstruction::ExtractLane, {FirstActiveLane, IncomingFromEarlyExit},
DebugLoc::getUnknown(), "early.exit.value");
ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
// Build the AnyOf condition for the latch terminator. For multiple exits,
// also create an exit dispatch block to determine which exit to take.
VPValue *Combined = Exits[0].CondToExit;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we're assuming the Builder's insert point is correct, i.e. all conditions dominate the insert point. Is it worth adding asserts that the condition dominates the latch terminator?

for (const auto &Exit : drop_begin(Exits))
Combined = Builder.createOr(Combined, Exit.CondToExit);
VPValue *IsAnyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {Combined});

VPSymbolicValue FirstActiveLane;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is new to me. From the documentation it says:

/// A symbolic live-in VPValue, used for values like vector trip count, VF, and
/// VFxUF.

The way you're using this here doesn't seem to match the documentation? This seems like a deviation from the LLVM IR way of doing things, which is to always a call a creation function that returns a pointer to an object allocated on the heap. Perhaps worth adding comments explaining a bit more about why we're doing it this way and how it interacts with the other values?

Alternatively, I've written a suggestion near the bottom of this function for how the code can be simply re-written to avoid the use of symbolic values entirely. There is nothing that fundamentally requires the use of this.

// Process exits in reverse order so phi operands are added in the order
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not obvious to me what "Process exits in ..." means. For example, where are the phi operands being added? Might be useful to have a simple diagram.

// matching the original program order (last exit's operand added first
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do we know at this point exactly what the program order is going to be? Is there some LLVM IR convention that says the phi operands must be arranged in a particular order, according to dominance?

// becomes last). The vector is reversed afterwards to restore forward order
// for the dispatch logic.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, I do find it a bit difficult to map the comments to how the IR is laid out in the original scalar loop, compared to how the VPlan is laid out both before and after the transformation here.

SmallVector<VPBasicBlock *> VectorEarlyExitVPBBs;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given you know exactly how many blocks you're going to create here can't you just do:

  SmallVector<VPBasicBlock *> VectorEarlyExitVPBBs(Exit.size());

? That way you initialise exactly the correct amount of space upfront without letting push_back resize dynamically each time.

for (auto [I, Exit] : enumerate(reverse(Exits))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here the Exits were sorted based on the dominance order of the exiting blocks, so by reversing the Exits we're now processing them so that the exiting block in the next iteration is guaranteed to dominate the exiting block of the previous iteration, right?

auto &[EarlyExitingVPBB, EarlyExitVPBB, CondToExit] = Exit;
unsigned Idx = Exits.size() - 1 - I;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given we already have an index I used for the enumeration it's a bit confusing having another index called Idx. It's probably worth renaming I and Idx according to have more meaningful names, i.e. ReverseIdx and ForwardIdx or something like that?

Twine BlockSuffix = Exits.size() == 1 ? "" : Twine(".") + Twine(Idx);
VPBasicBlock *VectorEarlyExitVPBB =
Plan.createVPBasicBlock("vector.early.exit" + BlockSuffix);
VectorEarlyExitVPBBs.push_back(VectorEarlyExitVPBB);

for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
VPValue *IncomingVal =
ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);

// Compute the incoming value for this early exit.
VPValue *NewIncoming = IncomingVal;
if (!isa<VPIRValue>(IncomingVal)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realise you haven't changed anything here as we had the same logic previously, but what is the incoming value if it's not a constant or live-in? I guess at some point we must have initialised it deliberately with an incorrect dummy value, expecting it to be replaced here with the correct one. I just wondered if there was a way to assert that we're replacing a "dummy" value?

VPBuilder EarlyExitB(VectorEarlyExitVPBB);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable names are a bit confusing here - EarlyExitB could also imply "early exit block". It might be more obvious to call it something like EarlyExitBuilder?

NewIncoming = EarlyExitB.createNaryOp(
VPInstruction::ExtractLane, {&FirstActiveLane, IncomingVal},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we're extracting from a value that hasn't been initialised to anything? Seems quite unusual.

DebugLoc::getUnknown(), "early.exit.value");
}
ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
// Add the new incoming value for this early exit.
ExitIRI->addOperand(NewIncoming);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks potentially dangerous. You have used getIncomingValueForBlock to get access to the original incoming value, then removed it and essentially appended it to the end. This assumes that getIncomingValueForBlock returned the last incoming value in the list of operands. I think we probably need to do one of the following:

  1. Before calling getIncomingValueForBlock assert that getBasicBlockIndex(EarlyExitingVPBB) == ExitIRI.getNumOperands() - 1, if that is indeed the expectation, or
  2. Add a new interface, something like changeIncomingValueFor(EarlyExitingVPBB) which gets the index for the block and modifies the operand in-place.

}

EarlyExitingVPBB->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What connects the original EarlyExitingVPBB to the new VectorEarlyExitVPBB?

}
VectorEarlyExitVPBBs = to_vector(llvm::reverse(VectorEarlyExitVPBBs));

// Replace the conditional branch controlling the latch exit from the vector
// loop with a multi-conditional branch exiting to vector early exit if the
// early exit has been taken, exiting to middle block if the original
// condition of the vector latch is true, otherwise continuing back to header.
// For exit blocks that also have the middle block as predecessor (latch
// exits to the same block as an early exit), extract the last lane of the
// first operand for the middle block's incoming value.
VPBuilder MiddleBuilder(MiddleVPBB);
for (VPRecipeBase &R :
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->phis()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to assert that the first successor of the middle block is indeed the exit that's shared by the early exit block?

auto *ExitIRI = cast<VPIRPhi>(&R);
if (ExitIRI->getNumOperands() == 1)
continue;
ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this essentially assuming that the first operand of ExitIRI is the incoming value for MiddleVPBB? What about the incoming values from the original scalar loop? If this code is making assumptions about ordering of incoming values it might be worth adding an assert.

}

if (Exits.size() != 1) {
VPBasicBlock *DispatchBB =
Plan.createVPBasicBlock("vector.early.exit.check");
DispatchBB->setParent(VectorEarlyExitVPBBs[0]->getParent());
// In the dispatch block, compute the first active lane across all
// conditions and chain through exits.
VPBuilder DispatchBuilder(DispatchBB);
// Chain through exits: for each exit, check if its condition is true at the
// first active lane. If so, take that exit. Otherwise, try the next exit.
VPBasicBlock *CurrentBB = DispatchBB;
for (auto [I, Exit] : enumerate(ArrayRef(Exits).drop_back())) {
VPValue *LaneVal = DispatchBuilder.createNaryOp(
VPInstruction::ExtractLane, {&FirstActiveLane, Exit.CondToExit},
DebugLoc::getUnknown(), "exit.cond.at.lane");

// For the last dispatch, branch directly to the last exit on false;
// otherwise, create a new check block.
bool IsLastDispatch = (I + 2 == Exits.size());
VPBasicBlock *FalseBB =
IsLastDispatch ? VectorEarlyExitVPBBs.back()
: Plan.createVPBasicBlock(
Twine("vector.early.exit.check.") + Twine(I));
if (!IsLastDispatch)
FalseBB->setParent(LatchVPBB->getParent());

DispatchBuilder.createNaryOp(VPInstruction::BranchOnCond, {LaneVal});
CurrentBB->setSuccessors({VectorEarlyExitVPBBs[I], FalseBB});
VectorEarlyExitVPBBs[I]->setPredecessors({CurrentBB});
FalseBB->setPredecessors({CurrentBB});

if (!IsLastDispatch) {
CurrentBB = FalseBB;
DispatchBuilder.setInsertPoint(CurrentBB);
}
}
VectorEarlyExitVPBBs[0] = DispatchBB;
}

VPBuilder DispatchBuilder(VectorEarlyExitVPBBs[0],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably good to explain this a bit more - what is VectorEarlyExitVPBBs[0] exactly when there are multiple exits? Does it correspond to the last or first early exiting block in the dominance chain? If it was closer to the original code that created the blocks it would be a bit easier to follow I think.

VectorEarlyExitVPBBs[0]->begin());
VPValue *FirstLane =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't we just create this much earlier on to avoid having to use the VPSymbolicValue? For example, just after creating Combined on line 3998 we can create all the VectorEarlyExitVPBBs, then create the first active lane immediately. There is nothing fundamentally that requires the use of VPSymbolicValue - restructuring the code will remove the need for it. For example, something like:

  // Build the AnyOf condition for the latch terminator. For multiple exits,
  // also create an exit dispatch block to determine which exit to take.
  VPValue *Combined = Exits[0].CondToExit;

  // Create the blocks ...
  SmallVector<VPBasicBlock *> VectorEarlyExitVPBBs(Exit.size());
  for (unsigned Idx = Exits.size() - 1; Idx >= 0; Idx--) {
    Twine BlockSuffix = Exits.size() == 1 ? "" : Twine(".") + Twine(ReverseIdx);
    VPBasicBlock *VectorEarlyExitVPBB =
        Plan.createVPBasicBlock("vector.early.exit" + BlockSuffix);
    VectorEarlyExitVPBBs.push_back(VectorEarlyExitVPBB);
  }

  VPBuilder DispatchBuilder(VectorEarlyExitVPBBs[0]);
  VPValue *FirstLane = 
    DispatchBuilder.createNaryOp(VPInstruction::FirstActiveLane, {Combined}, DebugLoc::getUnknown(), "first.active.lane");

  ... update incoming values ...

DispatchBuilder.createNaryOp(VPInstruction::FirstActiveLane, {Combined},
DebugLoc::getUnknown(), "first.active.lane");
FirstActiveLane.replaceAllUsesWith(FirstLane);

// Replace the latch terminator with the new branching logic.
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
"Unexpected terminator");
Expand All @@ -4027,13 +4102,12 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,

DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
LatchExitingBranch->eraseFromParent();

Builder.setInsertPoint(LatchVPBB);
Builder.createNaryOp(VPInstruction::BranchOnTwoConds,
{IsEarlyExitTaken, IsLatchExitTaken}, LatchDL);
{IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
LatchVPBB->clearSuccessors();
LatchVPBB->setSuccessors({VectorEarlyExitVPBB, MiddleVPBB, HeaderVPBB});
VectorEarlyExitVPBB->setPredecessors({LatchVPBB});
LatchVPBB->setSuccessors({VectorEarlyExitVPBBs[0], MiddleVPBB, HeaderVPBB});
VectorEarlyExitVPBBs[0]->setPredecessors({LatchVPBB});
}

/// This function tries convert extended in-loop reductions to
Expand Down
15 changes: 7 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,13 @@ struct VPlanTransforms {
/// Remove dead recipes from \p Plan.
static void removeDeadRecipes(VPlan &Plan);

/// Update \p Plan to account for the uncountable early exit from \p
/// EarlyExitingVPBB to \p EarlyExitVPBB by introducing a BranchOnTwoConds
/// terminator in the latch that handles the early exit and the latch exit
/// condition.
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
VPBasicBlock *EarlyExitVPBB,
VPlan &Plan, VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB);
/// Update \p Plan to account for uncountable early exits by introducing
/// appropriate branching logic in the latch that handles early exits and the
/// latch exit condition. Multiple exits are handled with a dispatch block
/// that determines which exit to take based on lane-by-lane semantics.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB,
VPBasicBlock *MiddleVPBB);

/// Replace loop regions with explicit CFG.
static void dissolveLoopRegions(VPlan &Plan);
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,10 @@ loop.end:
}


; Multiple uncountable early exits pass legality but are not yet supported
; in VPlan transformations.
; Multiple uncountable early exits are now supported.
define i64 @multiple_uncountable_exits() {
; CHECK-LABEL: LV: Checking a loop in 'multiple_uncountable_exits'
; CHECK: LV: We can vectorize this loop!
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with multiple uncountable early exits is not yet supported.
entry:
%p1 = alloca [1024 x i8]
%p2 = alloca [1024 x i8]
Expand Down
Loading