Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
11870a2
[LV] Vectorize select min/max index.
fhahn Jun 8, 2025
2688d03
!fixup address review comments, thanks
fhahn Jul 22, 2025
26ebf5a
!fixup remove stray new line
fhahn Aug 1, 2025
af2ba25
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Sep 11, 2025
2871d6c
!fixup fix build after merge
fhahn Sep 11, 2025
ae18690
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index…
fhahn Sep 11, 2025
4305caf
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index…
fhahn Sep 11, 2025
98fb1f7
!fixup detect multi-use min/max recurrences.
fhahn Sep 11, 2025
dc59607
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Oct 2, 2025
6134ef1
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Oct 2, 2025
ad99496
!fixup add additional uses.
fhahn Oct 2, 2025
5b65693
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Oct 6, 2025
fabcf69
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Oct 12, 2025
844c2c2
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Oct 26, 2025
df7e6b8
!fixup replace MultiUse kinds with boolean flag
fhahn Oct 26, 2025
5e209db
Merge branch 'main' into lv-find-min-max-index
fhahn Nov 3, 2025
127da7d
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 11, 2025
603c47c
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 19, 2025
76e661a
!fixup address comments, thanks
fhahn Nov 19, 2025
d74939e
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 20, 2025
a057dd3
!fixup address comments, thanks
fhahn Nov 20, 2025
c351e55
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 21, 2025
fd90ad9
!fixup address comments, thanks
fhahn Nov 21, 2025
2fd21ec
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 21, 2025
da55075
!fixup address latest comments, thanks
fhahn Nov 21, 2025
5a442b2
!fixup add argmin/argmax tests with fmin/fmax.
fhahn Nov 21, 2025
a78311d
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 22, 2025
87325fd
!fixup address latest comments, thanks
fhahn Nov 22, 2025
918f079
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 24, 2025
6cc3953
!fixup address latest comments, thanks
fhahn Nov 24, 2025
3cedf8a
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 25, 2025
b1ff1a4
!fixup address comments, thanks
fhahn Nov 25, 2025
895baa8
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index
fhahn Nov 26, 2025
a073c9b
!fixup
fhahn Nov 27, 2025
6d8e164
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index…
fhahn Nov 27, 2025
0671371
Step
fhahn Nov 27, 2025
450d6a0
[VPlan] Use m_Intrinsic to match assumes/noalias_scope_decl (NFC).
fhahn Nov 27, 2025
cb25d0c
[VPlan] Add matcher
fhahn Nov 27, 2025
7d34974
Fix crash
fhahn Nov 27, 2025
7710b71
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index…
fhahn Nov 27, 2025
7e2d9c3
Merge remote-tracking branch 'origin/main' into lv-find-min-max-index…
fhahn Nov 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion llvm/include/llvm/Analysis/IVDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,18 @@ class RecurrenceDescriptor {
RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
Type *RT, bool Signed, bool Ordered,
SmallPtrSetImpl<Instruction *> &CI,
unsigned MinWidthCastToRecurTy)
unsigned MinWidthCastToRecurTy,
bool PhiHasLoopUsesOutsideReductionChain = false)
: IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
IsSigned(Signed), IsOrdered(Ordered),
PhiHasLoopUsesOutsideReductionChain(
PhiHasLoopUsesOutsideReductionChain),
MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
CastInsts.insert_range(CI);
assert(
(!PhiHasLoopUsesOutsideReductionChain || isMinMaxRecurrenceKind(K)) &&
"Only min/max recurrences are allowed to have multiple uses currently");
}

/// This POD struct holds information about a potential recurrence operation.
Expand Down Expand Up @@ -339,6 +345,12 @@ class RecurrenceDescriptor {
/// Expose an ordered FP reduction to the instance users.
bool isOrdered() const { return IsOrdered; }

/// Returns true if the reduction PHI has multiple in-loop users. This is
/// relevant for min/max reductions that are part of a FindLastIV pattern.
bool hasLoopUsesOutsideReductionChain() const {
return PhiHasLoopUsesOutsideReductionChain;
}

/// Attempts to find a chain of operations from Phi to LoopExitInst that can
/// be treated as a set of reductions instructions for in-loop reductions.
LLVM_ABI SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi,
Expand Down Expand Up @@ -376,6 +388,10 @@ class RecurrenceDescriptor {
// Currently only a non-reassociative FAdd can be considered in-order,
// if it is also the only FAdd in the PHI's use chain.
bool IsOrdered = false;
// True if the reduction PHI has in-loop users outside the reduction chain.
// This is relevant for min/max reductions that are part of a FindLastIV
// pattern.
bool PhiHasLoopUsesOutsideReductionChain = false;
// Instructions used for type-promoting the recurrence.
SmallPtrSet<Instruction *, 8> CastInsts;
// The minimum width used by the recurrence.
Expand Down
51 changes: 51 additions & 0 deletions llvm/lib/Analysis/IVDescriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,52 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
return true;
}

/// Returns true if \p Phi is a min/max reduction matching \p Kind where \p Phi
/// is used in the loop outside the reduction chain. This is common for loops
/// selecting the index of a minimum/maximum value (argmin/argmax).
static bool isMinMaxReductionWithLoopUsersOutsideReductionChain(
PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes) {
BasicBlock *Latch = TheLoop->getLoopLatch();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe need to add assertion to check the Phi has 2 incoming values, one is from latch (already checked), and another is from preheader.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks

if (!Latch)
return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return false;
return false;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added, thansk


assert(Phi->getNumIncomingValues() == 2 && "phi must have 2 incoming values");
Value *Inc = Phi->getIncomingValueForBlock(Latch);
if (Phi->hasOneUse() || !Inc->hasOneUse() ||
!RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind))
return false;

Value *A, *B;
bool IsMinMax = [&]() {
switch (Kind) {
case RecurKind::UMax:
return match(Inc, m_UMax(m_Value(A), m_Value(B)));
case RecurKind::UMin:
return match(Inc, m_UMin(m_Value(A), m_Value(B)));
case RecurKind::SMax:
return match(Inc, m_SMax(m_Value(A), m_Value(B)));
case RecurKind::SMin:
return match(Inc, m_SMin(m_Value(A), m_Value(B)));
default:
llvm_unreachable("all min/max kinds must be handled");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
llvm_unreachable("all min/max kinds must be handled");
return false; // Only min/max are handled.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This checks for completeness, all integer min/max should be handled, and unreachable here should make it slightly easier to catch missing cases together with the check above

}
}();
if (!IsMinMax)
return false;

if (A == B || (A != Phi && B != Phi))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: can be written as !((A == Phi) ^ (B == Phi)) or ((A == Phi) == (B == Phi)), but probably clearer as written.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left it as-is for now, thanks

return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This checks that one of the multiple users of Phi is Inc, what about checking that another user is in the loop (rather than live-out)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other users will be checked in VPlan currently.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, but the name of the function and its documentation specifically claim it has a user in the loop. Better rename isMinMaxReductionPhiWithUsersOutsideReductionChain() instead, and update documentation?

Note that every reduction surely has users outside its chain, but they typically use the final post-updated value rather than the intermediate phi value. So handleMultiUseReductions() might also be better named, given that every reduction is essentially multi-use somewhere along its chain.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, should be updated ,thanks


SmallPtrSet<Instruction *, 4> CastInsts;
Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
RedDes =
RecurrenceDescriptor(RdxStart, /*Exit=*/nullptr, /*Store=*/nullptr, Kind,
FastMathFlags(), /*ExactFP=*/nullptr, Phi->getType(),
/*Signed=*/false, /*Ordered=*/false, CastInsts,
/*MinWidthCastToRecurTy=*/-1U, /*PhiMultiUse=*/true);
return true;
}

bool RecurrenceDescriptor::AddReductionVar(
PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC,
Expand All @@ -225,6 +271,11 @@ bool RecurrenceDescriptor::AddReductionVar(
if (Phi->getParent() != TheLoop->getHeader())
return false;

// Check for min/max reduction variables that feed other users in the loop.
if (isMinMaxReductionWithLoopUsersOutsideReductionChain(Phi, Kind, TheLoop,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (isMinMaxReductionWithLoopUsersOutsideReductionChain(Phi, Kind, TheLoop,
// Check for min/max reduction variables that feed other users in the loop.
if (isMinMaxReductionWithLoopUsersOutsideReductionChain(Phi, Kind, TheLoop,

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added thanks

RedDes))
return true;

// Obtain the reduction start value from the value that comes from the loop
// preheader.
Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Utils/LoopUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1253,6 +1253,8 @@ llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L,
/*DemandedBits=*/nullptr,
/*AC=*/nullptr, /*DT=*/nullptr, SE))
return std::nullopt;
if (RdxDesc.hasLoopUsesOutsideReductionChain())
return std::nullopt;
RecurKind RK = RdxDesc.getRecurrenceKind();
// Skip unsupported reductions.
// TODO: Handle additional reductions, including FP and min-max
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,11 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
AllowedExit.insert(RedDes.getLoopExitInstr());
Reductions[Phi] = RedDes;
assert((!RedDes.hasLoopUsesOutsideReductionChain() ||
RecurrenceDescriptor::isMinMaxRecurrenceKind(
RedDes.getRecurrenceKind())) &&
"Only min/max recurrences are allowed to have multiple uses "
"currently");
return true;
}

Expand Down
20 changes: 16 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6589,6 +6589,11 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
PHINode *Phi = Reduction.first;
const RecurrenceDescriptor &RdxDesc = Reduction.second;

// Multi-use reductions (e.g., used in FindLastIV patterns) are handled
// separately and should not be considered for in-loop reductions.
if (RdxDesc.hasLoopUsesOutsideReductionChain())
continue;

// We don't collect reductions that are type promoted (yet).
if (RdxDesc.getRecurrenceType() != Phi->getType())
continue;
Expand Down Expand Up @@ -7993,9 +7998,10 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
MapVector<Instruction *,
SmallVector<std::pair<PartialReductionChain, unsigned>>>
ChainsByPhi;
for (const auto &[Phi, RdxDesc] : Legal->getReductionVars())
getScaledReductions(Phi, RdxDesc.getLoopExitInstr(), Range,
ChainsByPhi[Phi]);
for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) {
if (Instruction *RdxExitInstr = RdxDesc.getLoopExitInstr())
getScaledReductions(Phi, RdxExitInstr, Range, ChainsByPhi[Phi]);
Comment on lines +8007 to +8008
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: may be good to wrap loop body in brackets.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done thanks

}

// A partial reduction is invalid if any of its extends are used by
// something that isn't another partial reduction. This is because the
Expand Down Expand Up @@ -8212,7 +8218,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
CM.useOrderedReductions(RdxDesc), ScaleFactor,
RdxDesc.hasLoopUsesOutsideReductionChain());
} else {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
Expand Down Expand Up @@ -8542,6 +8549,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

// Apply mandatory transformation to handle reductions with multiple in-loop
// uses if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMultiUseReductions,
*Plan))
return nullptr;
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
Expand Down
26 changes: 22 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2072,6 +2072,9 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
static inline bool classof(const VPValue *V) {
return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
}
static inline bool classof(const VPSingleDefRecipe *R) {
return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
}

/// Generate the phi nodes.
void execute(VPTransformState &State) override = 0;
Expand Down Expand Up @@ -2137,7 +2140,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
return R && classof(R);
}

static inline bool classof(const VPHeaderPHIRecipe *R) {
static inline bool classof(const VPSingleDefRecipe *R) {
return classof(static_cast<const VPRecipeBase *>(R));
}

Expand Down Expand Up @@ -2414,6 +2417,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
bool IsOrdered;

/// The phi is part of a multi-use reduction (e.g., used in FindLastIV
/// patterns for argmin/argmax).
/// TODO: Also support cases where the phi itself has a single use, but its
/// compare has multiple uses.
bool HasLoopUsesOutsideReductionChain;

/// When expanding the reduction PHI, the plan's VF element count is divided
/// by this factor to form the reduction phi's VF.
unsigned VFScaleFactor = 1;
Expand All @@ -2422,9 +2431,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Create a new VPReductionPHIRecipe for the reduction \p Phi.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
bool IsInLoop = false, bool IsOrdered = false,
unsigned VFScaleFactor = 1)
unsigned VFScaleFactor = 1,
bool HasLoopUsesOutsideReductionChain = false)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
IsInLoop(IsInLoop), IsOrdered(IsOrdered),
HasLoopUsesOutsideReductionChain(HasLoopUsesOutsideReductionChain),
VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}

Expand All @@ -2433,7 +2445,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPReductionPHIRecipe *clone() override {
auto *R = new VPReductionPHIRecipe(
dyn_cast_or_null<PHINode>(getUnderlyingValue()), getRecurrenceKind(),
*getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
*getOperand(0), IsInLoop, IsOrdered, VFScaleFactor,
HasLoopUsesOutsideReductionChain);
R->addOperand(getBackedgeValue());
return R;
}
Expand All @@ -2460,6 +2473,11 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Returns true, if the phi is part of an in-loop reduction.
bool isInLoop() const { return IsInLoop; }

/// Returns true, if the phi is part of a multi-use reduction.
bool hasLoopUsesOutsideReductionChain() const {
return HasLoopUsesOutsideReductionChain;
}

/// Returns true if the recipe only uses the first lane of operand \p Op.
bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
Expand Down
119 changes: 109 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -818,15 +818,18 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck(
Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
}

/// If \p RedPhiR is used by a ComputeReductionResult recipe, return it.
/// Otherwise return nullptr.
static VPInstruction *
findComputeReductionResult(VPReductionPHIRecipe *RedPhiR) {
auto It = find_if(RedPhiR->users(), [](VPUser *U) {
auto *VPI = dyn_cast<VPInstruction>(U);
return VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult;
});
return It == RedPhiR->user_end() ? nullptr : cast<VPInstruction>(*It);
/// If \p V is used by a recipe matching pattern \p P, return it. Otherwise
/// return nullptr;
template <typename MatchT>
static VPRecipeBase *findUserOf(VPValue *V, const MatchT &P) {
auto It = find_if(V->users(), match_fn(P));
return It == V->user_end() ? nullptr : cast<VPRecipeBase>(*It);
}

/// If \p V is used by a VPInstruction with \p Opcode, return it. Otherwise
/// return nullptr.
template <unsigned Opcode> static VPInstruction *findUserOf(VPValue *V) {
return cast_or_null<VPInstruction>(findUserOf(V, m_VPInstruction<Opcode>()));
}

bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
Expand Down Expand Up @@ -933,7 +936,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {

// If we exit early due to NaNs, compute the final reduction result based on
// the reduction phi at the beginning of the last vector iteration.
auto *RdxResult = findComputeReductionResult(RedPhiR);
auto *RdxResult =
findUserOf<VPInstruction::ComputeReductionResult>(RedPhiR);

auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
RdxResult->getOperand(1));
Expand Down Expand Up @@ -992,3 +996,98 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
MiddleTerm->setOperand(0, NewCond);
return true;
}

bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
for (auto &PhiR : make_early_inc_range(
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) {
auto *MinMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
// TODO: check for multi-uses in VPlan directly.
if (!MinMaxPhiR || !MinMaxPhiR->hasLoopUsesOutsideReductionChain())
continue;

RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind();
assert(
RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind) &&
"only min/max recurrences support users outside the reduction chain");

// One user of MinMaxPhiR is MinMaxOp, the other user must be a compare
// that's part of a FindLastIV chain.
auto *MinMaxOp =
dyn_cast<VPRecipeWithIRFlags>(MinMaxPhiR->getBackedgeValue());
if (!MinMaxOp || MinMaxOp->getNumUsers() != 2)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MultiUse can be more accurately DoubleUse.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, although I am not sure if we should tighten this down early on? A single min/max could serve multiple selects, supported here eventually?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiple selects could conceptually be fed by a single min/max, but are there conceivable use cases? The core argmin/argmax pattern seems sufficiently prevalent and involved to recognize and handle, to arguably deserve specific attention.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, now the multi use terminology has been replaced, would you prefer to make the initial check more specific?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better clarify what patterns are supported as accurately as possible at the outset.

Somewhat confusing - isMinMaxReductionWithLoopUsersOutsideReductionChain() checks that Phi has more than one user (in the loop) and Inc has one - here MinMaxOp (aka Inc?) has two? Should MinMaxOp be checked to have 2 operands but (asserted to have) 1 user?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I added a comment after the initial early continue in the loop.

I added the assert (and more asserts later below checking the expected users)

return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return false;
return false;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added ,thanks


assert((isa<VPWidenIntrinsicRecipe>(MinMaxOp) ||
(isa<VPReplicateRecipe>(MinMaxOp) &&
isa<IntrinsicInst>(
cast<VPReplicateRecipe>(MinMaxOp)->getUnderlyingValue()))) &&
"MinMaxOp must be a wide or scalar intrinsic");
VPValue *MinMaxOpA = MinMaxOp->getOperand(0);
VPValue *MinMaxOpB = MinMaxOp->getOperand(1);
if (MinMaxOpA != MinMaxPhiR)
std::swap(MinMaxOpA, MinMaxOpB);
if (MinMaxOpA != MinMaxPhiR)
return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does isMinMaxReductionWithLoopUsersOutsideReductionChain() guarantee that either MinMaxOpA xor MinMaxOpB are MinMaxPhiR, i.e., should this be an assert.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep this can be an assert, updated, thanks


VPValue *CmpOpA;
VPValue *CmpOpB;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

compress into one line, and choose the better name.

CmpPredicate Pred;
auto *Cmp = dyn_cast_or_null<VPRecipeWithIRFlags>(findUserOf(
MinMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
if (!Cmp || Cmp->getNumUsers() != 1 ||
(CmpOpA != MinMaxOpB && CmpOpB != MinMaxOpB))
return false;

// TODO: Strict predicates need to find the first IV value for which the
// predicate holds, not the last.
if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE ||
ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred))
return false;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth asserting that MinMaxOp aligns with Pred, i.e., both compute max or both compute min?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep this is now checked below, thanks

// Normalize the predicate so MinMaxPhiR is on the right side.
if (CmpOpA == MinMaxPhiR)
Pred = CmpInst::getSwappedPredicate(Pred);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it important to normalize so MinMaxPhiR is on the right - Pred appears to be unused?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for the current version, but will be needed later to support more cases. I removed it for now, thanks


// Cmp must be used by the select of a FindLastIV chain.
VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
VPValue *IVOp, *FindIV;
if (!Sel ||
!match(Sel,
m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) ||
Sel->getNumUsers() != 2)
return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return false;
return false;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added thanks


auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If FindIVPhiR has a FindLastIVRecurrenceKind, does that imply IVOp must be a VPWidenIntOrFpInductionRecipe?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, moved check from above to assert below, thanks

FindIVPhiR->getRecurrenceKind()))
return false;

assert(isa<VPWidenIntOrFpInductionRecipe>(IVOp) &&
"IVOp must be a wide induction");
assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
"cannot handle inloop/ordered reductions yet");

// The reduction using MinMaxPhiR needs adjusting to compute the correct
// result:
// 1. We need to find the last IV for which the condition based on the
// min/max recurrence is true,
// 2. Compare the partial min/max reduction result to its final value and,
// 3. Select the lanes of the partial FindLastIV reductions which
// correspond to the lanes matching the min/max reduction result.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be helpful to show snippets of VPInstructions before and after these steps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

VPInstruction *FindIVResult =
findUserOf<VPInstruction::ComputeFindIVResult>(FindIVPhiR);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: ComputeFindIVResult opcode missing documentation.

VPInstruction *MinMaxResult =
findUserOf<VPInstruction::ComputeReductionResult>(MinMaxPhiR);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: ComputeReductionResult opcode missing documentation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So MinMaxPhiR has (at-least) 3 users:

  1. a MinMaxOp intrinsic which computes the min/max reduction;
  2. a Cmp-Sel which computes the last index reduction;
  3. a ComputeReductionResult, although originally only argmax/min is used outside the loop w/o using max/min there as well.

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, all should be asserted now above.

MinMaxResult->moveBefore(*FindIVResult->getParent(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth asserting both have the same parent, expected to be the middle block?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added, thanks

FindIVResult->getIterator());

VPBuilder B(FindIVResult);
auto *FinalMinMaxCmp = B.createICmp(
CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to explicitly Broadcast MinMaxResult before feeding it to compare with its operand(1)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this stage, the broadcasts are still implicit and will introduced explicitly at a later stage.

auto *FinalIVSelect =
B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3),
FindIVResult->getOperand(2));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth assigning variables with meaningful names to the above operand(1) and these operand(2),(3), especially in the absence of documenting these VPInstruction opcodes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks

FindIVResult->setOperand(3, FinalIVSelect);
}
return true;
}
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ struct VPlanTransforms {
GetIntOrFpInductionDescriptor,
const TargetLibraryInfo &TLI);

/// Try to legalize reductions with multiple in-loop uses. Currently only
/// min/max reductions used by FindLastIV reductions are supported. Otherwise
/// return false.
static bool handleMultiUseReductions(VPlan &Plan);

/// Try to have all users of fixed-order recurrences appear after the recipe
/// defining their previous value, by either sinking users or hoisting recipes
/// defining their previous value (and its operands). Then introduce
Expand Down
Loading