Skip to content

Commit 34f2798

Browse files
committed
[LV] Vectorize conditional scalar assignments
Based on Michael Maitland's previous work: llvm#121222 This PR uses the existing recurrences code instead of introducing a new pass just for CSA autovec. I've also made recipes that are more generic. I've enabled it by default to see the impact on tests; if there are regressions we can put it behind a cli option.
1 parent 637f206 commit 34f2798

20 files changed

+2238
-300
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ enum class RecurKind {
7070
FindLastIVUMax, ///< FindLast reduction with select(cmp(),x,y) where one of
7171
///< (x,y) is increasing loop induction, and both x and y
7272
///< are integer type, producing a UMax reduction.
73+
FindLast, ///< FindLast reduction with select(cmp(),x,y) where x and y
74+
///< are an integer type, one is the current recurrence value,
75+
///< and the other is an arbitrary value.
7376
// clang-format on
7477
// TODO: Any_of and FindLast reduction need not be restricted to integer type
7578
// only.
@@ -175,13 +178,12 @@ class RecurrenceDescriptor {
175178
/// Returns a struct describing whether the instruction is either a
176179
/// Select(ICmp(A, B), X, Y), or
177180
/// Select(FCmp(A, B), X, Y)
178-
/// where one of (X, Y) is an increasing (FindLast) or decreasing (FindFirst)
179-
/// loop induction variable, and the other is a PHI value.
180-
// TODO: Support non-monotonic variable. FindLast does not need be restricted
181-
// to increasing loop induction variables.
182-
LLVM_ABI static InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop,
183-
PHINode *OrigPhi, Instruction *I,
184-
ScalarEvolution &SE);
181+
/// where one of (X, Y) is an increasing (FindLastIV) or decreasing
182+
/// (FindFirstIV) loop induction variable, or an arbitrary integer value
183+
/// (FindLast), and the other is a PHI value.
184+
LLVM_ABI static InstDesc isFindPattern(RecurKind Kind, Loop *TheLoop,
185+
PHINode *OrigPhi, Instruction *I,
186+
ScalarEvolution &SE);
185187

186188
/// Returns a struct describing if the instruction is a
187189
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
@@ -305,6 +307,13 @@ class RecurrenceDescriptor {
305307
isFindLastIVRecurrenceKind(Kind);
306308
}
307309

310+
/// Returns true if the recurrence kind is of the form
311+
/// select(cmp(),x,y) where one of (x,y) is an arbitrary value and the
312+
/// other is a recurrence.
313+
static bool isFindLastRecurrenceKind(RecurKind Kind) {
314+
return Kind == RecurKind::FindLast;
315+
}
316+
308317
/// Returns the type of the recurrence. This type can be narrower than the
309318
/// actual type of the Phi if the recurrence has been type-promoted.
310319
Type *getRecurrenceType() const { return RecurrenceType; }

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
5858
case RecurKind::FindFirstIVUMin:
5959
case RecurKind::FindLastIVSMax:
6060
case RecurKind::FindLastIVUMax:
61+
// TODO: Make type-agnostic.
62+
case RecurKind::FindLast:
6163
return true;
6264
}
6365
return false;
@@ -695,9 +697,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
695697
// value of the data type or a non-constant value by using mask and multiple
696698
// reduction operations.
697699
RecurrenceDescriptor::InstDesc
698-
RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
699-
PHINode *OrigPhi, Instruction *I,
700-
ScalarEvolution &SE) {
700+
RecurrenceDescriptor::isFindPattern(RecurKind Kind, Loop *TheLoop,
701+
PHINode *OrigPhi, Instruction *I,
702+
ScalarEvolution &SE) {
701703
// TODO: Support the vectorization of FindLastIV when the reduction phi is
702704
// used by more than one select instruction. This vectorization is only
703705
// performed when the SCEV of each increasing induction variable used by the
@@ -706,8 +708,10 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
706708
return InstDesc(false, I);
707709

708710
// We are looking for selects of the form:
709-
// select(cmp(), phi, loop_induction) or
710-
// select(cmp(), loop_induction, phi)
711+
// select(cmp(), phi, value) or
712+
// select(cmp(), value, phi)
713+
// where 'value' is be a loop induction variable
714+
// (for FindFirstIV/FindLastIV) or an arbitrary value (for FindLast).
711715
// TODO: Match selects with multi-use cmp conditions.
712716
Value *NonRdxPhi = nullptr;
713717
if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
@@ -716,6 +720,25 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
716720
m_Value(NonRdxPhi)))))
717721
return InstDesc(false, I);
718722

723+
if (isFindLastRecurrenceKind(Kind)) {
724+
// Must be an integer scalar.
725+
Type *Type = OrigPhi->getType();
726+
if (!Type->isIntegerTy())
727+
return InstDesc(false, I);
728+
729+
// FIXME: Support more complex patterns, including multiple selects.
730+
// The Select must be used only outside the loop and by the PHI.
731+
for (User *U : I->users()) {
732+
if (U == OrigPhi)
733+
continue;
734+
if (auto *UI = dyn_cast<Instruction>(U); UI && !TheLoop->contains(UI))
735+
continue;
736+
return InstDesc(false, I);
737+
}
738+
739+
return InstDesc(I, RecurKind::FindLast);
740+
}
741+
719742
// Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
720743
// std::nullopt.
721744
auto GetRecurKind = [&](Value *V) -> std::optional<RecurKind> {
@@ -925,8 +948,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
925948
Kind == RecurKind::Add || Kind == RecurKind::Mul ||
926949
Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs)
927950
return isConditionalRdxPattern(I);
928-
if (isFindIVRecurrenceKind(Kind) && SE)
929-
return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
951+
if ((isFindIVRecurrenceKind(Kind) || isFindLastRecurrenceKind(Kind)) && SE)
952+
return isFindPattern(Kind, L, OrigPhi, I, *SE);
930953
[[fallthrough]];
931954
case Instruction::FCmp:
932955
case Instruction::ICmp:
@@ -1123,7 +1146,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
11231146
<< "\n");
11241147
return true;
11251148
}
1126-
1149+
if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC,
1150+
DT, SE)) {
1151+
LLVM_DEBUG(dbgs() << "Found a FindLast reduction PHI." << *Phi << "\n");
1152+
return true;
1153+
}
11271154
// Not a reduction of known type.
11281155
return false;
11291156
}
@@ -1248,6 +1275,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
12481275
case RecurKind::FMaximumNum:
12491276
case RecurKind::FMinimumNum:
12501277
return Instruction::FCmp;
1278+
case RecurKind::FindLast:
1279+
return Instruction::Select;
12511280
case RecurKind::AnyOf:
12521281
case RecurKind::FindFirstIVSMin:
12531282
case RecurKind::FindFirstIVUMin:

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5471,6 +5471,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
54715471
case RecurKind::FMax:
54725472
case RecurKind::FMulAdd:
54735473
case RecurKind::AnyOf:
5474+
case RecurKind::FindLast:
54745475
return true;
54755476
default:
54765477
return false;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,6 +1293,7 @@ class LoopVectorizationCostModel {
12931293
"from latch block\n");
12941294
return true;
12951295
}
1296+
12961297
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
12971298
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
12981299
"interleaved group requires scalar epilogue\n");
@@ -4084,6 +4085,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40844085
continue;
40854086
case VPDef::VPReductionSC:
40864087
case VPDef::VPActiveLaneMaskPHISC:
4088+
case VPDef::VPLastActiveMaskPHISC:
40874089
case VPDef::VPWidenCallSC:
40884090
case VPDef::VPWidenCanonicalIVSC:
40894091
case VPDef::VPWidenCastSC:
@@ -4302,11 +4304,15 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
43024304
ElementCount VF) const {
43034305
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
43044306
// reductions need special handling and are currently unsupported.
4307+
// FindLast reductions also require special handling for the synthesized
4308+
// mask PHI.
43054309
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
43064310
if (!Legal->isReductionVariable(&Phi))
43074311
return Legal->isFixedOrderRecurrence(&Phi);
4308-
return RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
4309-
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind());
4312+
RecurKind Kind =
4313+
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
4314+
return RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) ||
4315+
RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(Kind);
43104316
}))
43114317
return false;
43124318

@@ -4612,6 +4618,12 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
46124618
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
46134619
IsaPred<VPReductionPHIRecipe>);
46144620

4621+
// FIXME: implement interleaving for FindLast transform correctly.
4622+
for (auto &[_, RdxDesc] : Legal->getReductionVars())
4623+
if (RecurrenceDescriptor::isFindLastRecurrenceKind(
4624+
RdxDesc.getRecurrenceKind()))
4625+
return 1;
4626+
46154627
// If we did not calculate the cost for VF (because the user selected the VF)
46164628
// then we calculate the cost of VF here.
46174629
if (LoopCost == 0) {
@@ -8586,6 +8598,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85868598
*Plan, Builder))
85878599
return nullptr;
85888600

8601+
// Create whole-vector selects for find-last recurrences.
8602+
VPlanTransforms::runPass(VPlanTransforms::convertFindLastRecurrences, *Plan,
8603+
RecipeBuilder, Legal);
8604+
85898605
if (useActiveLaneMask(Style)) {
85908606
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
85918607
// TailFoldingStyle is visible there.
@@ -8669,10 +8685,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86698685
continue;
86708686

86718687
RecurKind Kind = PhiR->getRecurrenceKind();
8672-
assert(
8673-
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
8674-
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
8675-
"AnyOf and FindIV reductions are not allowed for in-loop reductions");
8688+
assert(!RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) &&
8689+
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
8690+
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
8691+
"AnyOf, FindIV, and FindLast reductions are not allowed for in-loop "
8692+
"reductions");
86768693

86778694
bool IsFPRecurrence =
86788695
RecurrenceDescriptor::isFloatingPointRecurrenceKind(Kind);
@@ -8976,7 +8993,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
89768993
RecurKind RK = RdxDesc.getRecurrenceKind();
89778994
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
89788995
!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
8979-
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
8996+
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) &&
8997+
!RecurrenceDescriptor::isFindLastRecurrenceKind(RK))) {
89808998
VPBuilder PHBuilder(Plan->getVectorPreheader());
89818999
VPValue *Iden = Plan->getOrAddLiveIn(
89829000
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
@@ -9389,7 +9407,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
93899407
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
93909408
for (VPRecipeBase &R :
93919409
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9392-
if (isa<VPCanonicalIVPHIRecipe>(&R))
9410+
if (isa<VPCanonicalIVPHIRecipe, VPLastActiveMaskPHIRecipe>(&R))
93939411
continue;
93949412
EpiWidenedPhis.insert(
93959413
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
@@ -9586,6 +9604,10 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
95869604
continue;
95879605
}
95889606
}
9607+
} else if (isa<VPLastActiveMaskPHIRecipe>(R)) {
9608+
// LastActiveMasks are only used as part of FindLast reductions,
9609+
// and aren't passed to the scalar loop.
9610+
continue;
95899611
} else {
95909612
// Retrieve the induction resume values for wide inductions from
95919613
// their original phi nodes in the scalar loop.
@@ -10107,6 +10129,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1010710129
// Override IC if user provided an interleave count.
1010810130
IC = UserIC > 0 ? UserIC : IC;
1010910131

10132+
// FIXME: Enable interleaving for last_active reductions.
10133+
if (any_of(make_second_range(LVL.getReductionVars()), [&](auto &RdxDesc) {
10134+
return RecurrenceDescriptor::isFindLastRecurrenceKind(
10135+
RdxDesc.getRecurrenceKind());
10136+
})) {
10137+
LLVM_DEBUG(dbgs() << "LV: Not interleaving without vectorization due "
10138+
<< "to conditional scalar assignments.\n");
10139+
IntDiagMsg = {
10140+
"ConditionalAssignmentPreventsScalarInterleaving",
10141+
"Unable to interleave without vectorization due to conditional "
10142+
"assignments"};
10143+
InterleaveLoop = false;
10144+
IC = 1;
10145+
}
10146+
1011010147
// Emit diagnostic messages, if any.
1011110148
const char *VAPassName = Hints.vectorizeAnalysisPassName();
1011210149
if (!VectorizeLoop && !InterleaveLoop) {

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25381,6 +25381,7 @@ class HorizontalReduction {
2538125381
case RecurKind::FindFirstIVUMin:
2538225382
case RecurKind::FindLastIVSMax:
2538325383
case RecurKind::FindLastIVUMax:
25384+
case RecurKind::FindLast:
2538425385
case RecurKind::FMaxNum:
2538525386
case RecurKind::FMinNum:
2538625387
case RecurKind::FMaximumNum:
@@ -25522,6 +25523,7 @@ class HorizontalReduction {
2552225523
case RecurKind::FindFirstIVUMin:
2552325524
case RecurKind::FindLastIVSMax:
2552425525
case RecurKind::FindLastIVUMax:
25526+
case RecurKind::FindLast:
2552525527
case RecurKind::FMaxNum:
2552625528
case RecurKind::FMinNum:
2552725529
case RecurKind::FMaximumNum:
@@ -25628,6 +25630,7 @@ class HorizontalReduction {
2562825630
case RecurKind::FindFirstIVUMin:
2562925631
case RecurKind::FindLastIVSMax:
2563025632
case RecurKind::FindLastIVUMax:
25633+
case RecurKind::FindLast:
2563125634
case RecurKind::FMaxNum:
2563225635
case RecurKind::FMinNum:
2563325636
case RecurKind::FMaximumNum:

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
561561
case VPRecipeBase::VPPredInstPHISC:
562562
case VPRecipeBase::VPCanonicalIVPHISC:
563563
case VPRecipeBase::VPActiveLaneMaskPHISC:
564+
case VPRecipeBase::VPLastActiveMaskPHISC:
564565
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
565566
case VPRecipeBase::VPWidenPHISC:
566567
case VPRecipeBase::VPWidenIntOrFpInductionSC:
@@ -1121,6 +1122,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
11211122
/// Returns the value for vscale.
11221123
VScale,
11231124
OpsEnd = VScale,
1125+
/// Extracts the last active lane based on a predicate vector operand.
1126+
ExtractLastActive,
11241127
};
11251128

11261129
/// Returns true if this VPInstruction generates scalar values for all lanes.
@@ -3639,6 +3642,40 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
36393642
#endif
36403643
};
36413644

3645+
// TODO: Can we unify the PHI recipe hierarchy a bit? VPPredInstPHISC is close
3646+
// to this (just a PHI of a predicate), but isn't a header phi so can't
3647+
// be used for the mask of FindLastActive reductions.
3648+
//
3649+
// This is basically a clone of VPActiveLaneMaskPHIRecipe, but won't run into
3650+
// problems with transforms that expect there to only be a single ALM PHI, and
3651+
// can be ignored by other code looking for a (non-existent) underlying value.
3652+
class VPLastActiveMaskPHIRecipe : public VPHeaderPHIRecipe {
3653+
public:
3654+
VPLastActiveMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3655+
: VPHeaderPHIRecipe(VPDef::VPLastActiveMaskPHISC, nullptr, StartMask,
3656+
DL) {}
3657+
3658+
~VPLastActiveMaskPHIRecipe() override = default;
3659+
3660+
VPLastActiveMaskPHIRecipe *clone() override {
3661+
auto *R = new VPLastActiveMaskPHIRecipe(getOperand(0), getDebugLoc());
3662+
if (getNumOperands() == 2)
3663+
R->addOperand(getOperand(1));
3664+
return R;
3665+
}
3666+
3667+
VP_CLASSOF_IMPL(VPDef::VPLastActiveMaskPHISC);
3668+
3669+
/// Generate the mask phi
3670+
void execute(VPTransformState &State) override;
3671+
3672+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3673+
/// Print the recipe
3674+
void print(raw_ostream &O, const Twine &Indent,
3675+
VPSlotTracker &SlotTracker) const override;
3676+
#endif
3677+
};
3678+
36423679
/// A recipe for generating the phi node for the current index of elements,
36433680
/// adjusted in accordance with EVL value. It starts at the start value of the
36443681
/// canonical induction and gets incremented by EVL in each iteration of the

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
118118
return Type::getIntNTy(Ctx, 64);
119119
case VPInstruction::ExtractLastElement:
120120
case VPInstruction::ExtractLastLanePerPart:
121-
case VPInstruction::ExtractPenultimateElement: {
121+
case VPInstruction::ExtractPenultimateElement:
122+
case VPInstruction::ExtractLastActive: {
122123
Type *BaseTy = inferScalarType(R->getOperand(0));
123124
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
124125
return VecTy->getElementType();
@@ -276,14 +277,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
276277
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
277278
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
278279
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
279-
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
280-
[this](const auto *R) {
281-
// Handle header phi recipes, except VPWidenIntOrFpInduction
282-
// which needs special handling due it being possibly truncated.
283-
// TODO: consider inferring/caching type of siblings, e.g.,
284-
// backedge value, here and in cases below.
285-
return inferScalarType(R->getStartValue());
286-
})
280+
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
281+
VPLastActiveMaskPHIRecipe>([this](const auto *R) {
282+
// Handle header phi recipes, except VPWidenIntOrFpInduction
283+
// which needs special handling due it being possibly truncated.
284+
// TODO: consider inferring/caching type of siblings, e.g.,
285+
// backedge value, here and in cases below.
286+
return inferScalarType(R->getStartValue());
287+
})
287288
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
288289
[](const auto *R) { return R->getScalarType(); })
289290
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,

0 commit comments

Comments
 (0)