Skip to content

Commit c89644c

Browse files
committed
[LV] Vectorize conditional scalar assignments
Based on Michael Maitland's previous work: llvm#121222 This PR uses the existing recurrences code instead of introducing a new pass just for CSA autovec. I've also made recipes that are more generic. I've enabled it by default to see the impact on tests; if there are regressions we can put it behind a cli option.
1 parent c2b4e48 commit c89644c

20 files changed

+2238
-300
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ enum class RecurKind {
7070
FindLastIVUMax, ///< FindLast reduction with select(cmp(),x,y) where one of
7171
///< (x,y) is increasing loop induction, and both x and y
7272
///< are integer type, producing a UMax reduction.
73+
FindLast, ///< FindLast reduction with select(cmp(),x,y) where x and y
74+
///< are an integer type, one is the current recurrence value,
75+
///< and the other is an arbitrary value.
7376
// clang-format on
7477
// TODO: Any_of and FindLast reduction need not be restricted to integer type
7578
// only.
@@ -175,13 +178,12 @@ class RecurrenceDescriptor {
175178
/// Returns a struct describing whether the instruction is either a
176179
/// Select(ICmp(A, B), X, Y), or
177180
/// Select(FCmp(A, B), X, Y)
178-
/// where one of (X, Y) is an increasing (FindLast) or decreasing (FindFirst)
179-
/// loop induction variable, and the other is a PHI value.
180-
// TODO: Support non-monotonic variable. FindLast does not need be restricted
181-
// to increasing loop induction variables.
182-
LLVM_ABI static InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop,
183-
PHINode *OrigPhi, Instruction *I,
184-
ScalarEvolution &SE);
181+
/// where one of (X, Y) is an increasing (FindLastIV) or decreasing
182+
/// (FindFirstIV) loop induction variable, or an arbitrary integer value
183+
/// (FindLast), and the other is a PHI value.
184+
LLVM_ABI static InstDesc isFindPattern(RecurKind Kind, Loop *TheLoop,
185+
PHINode *OrigPhi, Instruction *I,
186+
ScalarEvolution &SE);
185187

186188
/// Returns a struct describing if the instruction is a
187189
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
@@ -305,6 +307,13 @@ class RecurrenceDescriptor {
305307
isFindLastIVRecurrenceKind(Kind);
306308
}
307309

310+
/// Returns true if the recurrence kind is of the form
311+
/// select(cmp(),x,y) where one of (x,y) is an arbitrary value and the
312+
/// other is a recurrence.
313+
static bool isFindLastRecurrenceKind(RecurKind Kind) {
314+
return Kind == RecurKind::FindLast;
315+
}
316+
308317
/// Returns the type of the recurrence. This type can be narrower than the
309318
/// actual type of the Phi if the recurrence has been type-promoted.
310319
Type *getRecurrenceType() const { return RecurrenceType; }

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
5656
case RecurKind::FindFirstIVUMin:
5757
case RecurKind::FindLastIVSMax:
5858
case RecurKind::FindLastIVUMax:
59+
// TODO: Make type-agnostic.
60+
case RecurKind::FindLast:
5961
return true;
6062
}
6163
return false;
@@ -691,9 +693,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
691693
// value of the data type or a non-constant value by using mask and multiple
692694
// reduction operations.
693695
RecurrenceDescriptor::InstDesc
694-
RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
695-
PHINode *OrigPhi, Instruction *I,
696-
ScalarEvolution &SE) {
696+
RecurrenceDescriptor::isFindPattern(RecurKind Kind, Loop *TheLoop,
697+
PHINode *OrigPhi, Instruction *I,
698+
ScalarEvolution &SE) {
697699
// TODO: Support the vectorization of FindLastIV when the reduction phi is
698700
// used by more than one select instruction. This vectorization is only
699701
// performed when the SCEV of each increasing induction variable used by the
@@ -702,8 +704,10 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
702704
return InstDesc(false, I);
703705

704706
// We are looking for selects of the form:
705-
// select(cmp(), phi, loop_induction) or
706-
// select(cmp(), loop_induction, phi)
707+
// select(cmp(), phi, value) or
708+
// select(cmp(), value, phi)
709+
// where 'value' is be a loop induction variable
710+
// (for FindFirstIV/FindLastIV) or an arbitrary value (for FindLast).
707711
// TODO: Match selects with multi-use cmp conditions.
708712
Value *NonRdxPhi = nullptr;
709713
if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
@@ -712,6 +716,25 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
712716
m_Value(NonRdxPhi)))))
713717
return InstDesc(false, I);
714718

719+
if (isFindLastRecurrenceKind(Kind)) {
720+
// Must be an integer scalar.
721+
Type *Type = OrigPhi->getType();
722+
if (!Type->isIntegerTy())
723+
return InstDesc(false, I);
724+
725+
// FIXME: Support more complex patterns, including multiple selects.
726+
// The Select must be used only outside the loop and by the PHI.
727+
for (User *U : I->users()) {
728+
if (U == OrigPhi)
729+
continue;
730+
if (auto *UI = dyn_cast<Instruction>(U); UI && !TheLoop->contains(UI))
731+
continue;
732+
return InstDesc(false, I);
733+
}
734+
735+
return InstDesc(I, RecurKind::FindLast);
736+
}
737+
715738
// Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
716739
// std::nullopt.
717740
auto GetRecurKind = [&](Value *V) -> std::optional<RecurKind> {
@@ -920,8 +943,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
920943
Kind == RecurKind::Add || Kind == RecurKind::Mul ||
921944
Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs)
922945
return isConditionalRdxPattern(I);
923-
if (isFindIVRecurrenceKind(Kind) && SE)
924-
return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
946+
if ((isFindIVRecurrenceKind(Kind) || isFindLastRecurrenceKind(Kind)) && SE)
947+
return isFindPattern(Kind, L, OrigPhi, I, *SE);
925948
[[fallthrough]];
926949
case Instruction::FCmp:
927950
case Instruction::ICmp:
@@ -1118,7 +1141,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
11181141
<< "\n");
11191142
return true;
11201143
}
1121-
1144+
if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC,
1145+
DT, SE)) {
1146+
LLVM_DEBUG(dbgs() << "Found a FindLast reduction PHI." << *Phi << "\n");
1147+
return true;
1148+
}
11221149
// Not a reduction of known type.
11231150
return false;
11241151
}
@@ -1243,6 +1270,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
12431270
case RecurKind::FMaximumNum:
12441271
case RecurKind::FMinimumNum:
12451272
return Instruction::FCmp;
1273+
case RecurKind::FindLast:
1274+
return Instruction::Select;
12461275
case RecurKind::AnyOf:
12471276
case RecurKind::FindFirstIVSMin:
12481277
case RecurKind::FindFirstIVUMin:

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5471,6 +5471,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
54715471
case RecurKind::FMax:
54725472
case RecurKind::FMulAdd:
54735473
case RecurKind::AnyOf:
5474+
case RecurKind::FindLast:
54745475
return true;
54755476
default:
54765477
return false;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,6 +1293,7 @@ class LoopVectorizationCostModel {
12931293
"from latch block\n");
12941294
return true;
12951295
}
1296+
12961297
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
12971298
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
12981299
"interleaved group requires scalar epilogue\n");
@@ -4084,6 +4085,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40844085
continue;
40854086
case VPDef::VPReductionSC:
40864087
case VPDef::VPActiveLaneMaskPHISC:
4088+
case VPDef::VPLastActiveMaskPHISC:
40874089
case VPDef::VPWidenCallSC:
40884090
case VPDef::VPWidenCanonicalIVSC:
40894091
case VPDef::VPWidenCastSC:
@@ -4302,11 +4304,15 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
43024304
ElementCount VF) const {
43034305
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
43044306
// reductions need special handling and are currently unsupported.
4307+
// FindLast reductions also require special handling for the synthesized
4308+
// mask PHI.
43054309
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
43064310
if (!Legal->isReductionVariable(&Phi))
43074311
return Legal->isFixedOrderRecurrence(&Phi);
4308-
return RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
4309-
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind());
4312+
RecurKind Kind =
4313+
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
4314+
return RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) ||
4315+
RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(Kind);
43104316
}))
43114317
return false;
43124318

@@ -4612,6 +4618,12 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
46124618
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
46134619
IsaPred<VPReductionPHIRecipe>);
46144620

4621+
// FIXME: implement interleaving for FindLast transform correctly.
4622+
for (auto &[_, RdxDesc] : Legal->getReductionVars())
4623+
if (RecurrenceDescriptor::isFindLastRecurrenceKind(
4624+
RdxDesc.getRecurrenceKind()))
4625+
return 1;
4626+
46154627
// If we did not calculate the cost for VF (because the user selected the VF)
46164628
// then we calculate the cost of VF here.
46174629
if (LoopCost == 0) {
@@ -8566,6 +8578,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85668578
*Plan, Builder))
85678579
return nullptr;
85688580

8581+
// Create whole-vector selects for find-last recurrences.
8582+
VPlanTransforms::runPass(VPlanTransforms::convertFindLastRecurrences, *Plan,
8583+
RecipeBuilder, Legal);
8584+
85698585
if (useActiveLaneMask(Style)) {
85708586
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
85718587
// TailFoldingStyle is visible there.
@@ -8660,10 +8676,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86608676
continue;
86618677

86628678
RecurKind Kind = PhiR->getRecurrenceKind();
8663-
assert(
8664-
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
8665-
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
8666-
"AnyOf and FindIV reductions are not allowed for in-loop reductions");
8679+
assert(!RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) &&
8680+
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
8681+
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
8682+
"AnyOf, FindIV, and FindLast reductions are not allowed for in-loop "
8683+
"reductions");
86678684

86688685
// Collect the chain of "link" recipes for the reduction starting at PhiR.
86698686
SetVector<VPSingleDefRecipe *> Worklist;
@@ -8960,7 +8977,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
89608977
RecurKind RK = RdxDesc.getRecurrenceKind();
89618978
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
89628979
!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
8963-
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
8980+
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) &&
8981+
!RecurrenceDescriptor::isFindLastRecurrenceKind(RK))) {
89648982
VPBuilder PHBuilder(Plan->getVectorPreheader());
89658983
VPValue *Iden = Plan->getOrAddLiveIn(
89668984
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
@@ -9362,7 +9380,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
93629380
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
93639381
for (VPRecipeBase &R :
93649382
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9365-
if (isa<VPCanonicalIVPHIRecipe>(&R))
9383+
if (isa<VPCanonicalIVPHIRecipe, VPLastActiveMaskPHIRecipe>(&R))
93669384
continue;
93679385
EpiWidenedPhis.insert(
93689386
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
@@ -9559,6 +9577,10 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
95599577
continue;
95609578
}
95619579
}
9580+
} else if (isa<VPLastActiveMaskPHIRecipe>(R)) {
9581+
// LastActiveMasks are only used as part of FindLast reductions,
9582+
// and aren't passed to the scalar loop.
9583+
continue;
95629584
} else {
95639585
// Retrieve the induction resume values for wide inductions from
95649586
// their original phi nodes in the scalar loop.
@@ -10080,6 +10102,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1008010102
// Override IC if user provided an interleave count.
1008110103
IC = UserIC > 0 ? UserIC : IC;
1008210104

10105+
// FIXME: Enable interleaving for last_active reductions.
10106+
if (any_of(make_second_range(LVL.getReductionVars()), [&](auto &RdxDesc) {
10107+
return RecurrenceDescriptor::isFindLastRecurrenceKind(
10108+
RdxDesc.getRecurrenceKind());
10109+
})) {
10110+
LLVM_DEBUG(dbgs() << "LV: Not interleaving without vectorization due "
10111+
<< "to conditional scalar assignments.\n");
10112+
IntDiagMsg = {
10113+
"ConditionalAssignmentPreventsScalarInterleaving",
10114+
"Unable to interleave without vectorization due to conditional "
10115+
"assignments"};
10116+
InterleaveLoop = false;
10117+
IC = 1;
10118+
}
10119+
1008310120
// Emit diagnostic messages, if any.
1008410121
const char *VAPassName = Hints.vectorizeAnalysisPassName();
1008510122
if (!VectorizeLoop && !InterleaveLoop) {

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25284,6 +25284,7 @@ class HorizontalReduction {
2528425284
case RecurKind::FindFirstIVUMin:
2528525285
case RecurKind::FindLastIVSMax:
2528625286
case RecurKind::FindLastIVUMax:
25287+
case RecurKind::FindLast:
2528725288
case RecurKind::FMaxNum:
2528825289
case RecurKind::FMinNum:
2528925290
case RecurKind::FMaximumNum:
@@ -25425,6 +25426,7 @@ class HorizontalReduction {
2542525426
case RecurKind::FindFirstIVUMin:
2542625427
case RecurKind::FindLastIVSMax:
2542725428
case RecurKind::FindLastIVUMax:
25429+
case RecurKind::FindLast:
2542825430
case RecurKind::FMaxNum:
2542925431
case RecurKind::FMinNum:
2543025432
case RecurKind::FMaximumNum:
@@ -25531,6 +25533,7 @@ class HorizontalReduction {
2553125533
case RecurKind::FindFirstIVUMin:
2553225534
case RecurKind::FindLastIVSMax:
2553325535
case RecurKind::FindLastIVUMax:
25536+
case RecurKind::FindLast:
2553425537
case RecurKind::FMaxNum:
2553525538
case RecurKind::FMinNum:
2553625539
case RecurKind::FMaximumNum:

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
561561
case VPRecipeBase::VPPredInstPHISC:
562562
case VPRecipeBase::VPCanonicalIVPHISC:
563563
case VPRecipeBase::VPActiveLaneMaskPHISC:
564+
case VPRecipeBase::VPLastActiveMaskPHISC:
564565
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
565566
case VPRecipeBase::VPWidenPHISC:
566567
case VPRecipeBase::VPWidenIntOrFpInductionSC:
@@ -1119,6 +1120,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
11191120
/// Returns the value for vscale.
11201121
VScale,
11211122
OpsEnd = VScale,
1123+
/// Extracts the last active lane based on a predicate vector operand.
1124+
ExtractLastActive,
11221125
};
11231126

11241127
/// Returns true if this VPInstruction generates scalar values for all lanes.
@@ -3652,6 +3655,40 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
36523655
#endif
36533656
};
36543657

3658+
// TODO: Can we unify the PHI recipe hierarchy a bit? VPPredInstPHISC is close
3659+
// to this (just a PHI of a predicate), but isn't a header phi so can't
3660+
// be used for the mask of FindLastActive reductions.
3661+
//
3662+
// This is basically a clone of VPActiveLaneMaskPHIRecipe, but won't run into
3663+
// problems with transforms that expect there to only be a single ALM PHI, and
3664+
// can be ignored by other code looking for a (non-existent) underlying value.
3665+
class VPLastActiveMaskPHIRecipe : public VPHeaderPHIRecipe {
3666+
public:
3667+
VPLastActiveMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3668+
: VPHeaderPHIRecipe(VPDef::VPLastActiveMaskPHISC, nullptr, StartMask,
3669+
DL) {}
3670+
3671+
~VPLastActiveMaskPHIRecipe() override = default;
3672+
3673+
VPLastActiveMaskPHIRecipe *clone() override {
3674+
auto *R = new VPLastActiveMaskPHIRecipe(getOperand(0), getDebugLoc());
3675+
if (getNumOperands() == 2)
3676+
R->addOperand(getOperand(1));
3677+
return R;
3678+
}
3679+
3680+
VP_CLASSOF_IMPL(VPDef::VPLastActiveMaskPHISC);
3681+
3682+
/// Generate the mask phi
3683+
void execute(VPTransformState &State) override;
3684+
3685+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3686+
/// Print the recipe
3687+
void print(raw_ostream &O, const Twine &Indent,
3688+
VPSlotTracker &SlotTracker) const override;
3689+
#endif
3690+
};
3691+
36553692
/// A recipe for generating the phi node for the current index of elements,
36563693
/// adjusted in accordance with EVL value. It starts at the start value of the
36573694
/// canonical induction and gets incremented by EVL in each iteration of the

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
118118
return Type::getIntNTy(Ctx, 64);
119119
case VPInstruction::ExtractLastElement:
120120
case VPInstruction::ExtractLastLanePerPart:
121-
case VPInstruction::ExtractPenultimateElement: {
121+
case VPInstruction::ExtractPenultimateElement:
122+
case VPInstruction::ExtractLastActive: {
122123
Type *BaseTy = inferScalarType(R->getOperand(0));
123124
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
124125
return VecTy->getElementType();
@@ -276,14 +277,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
276277
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
277278
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
278279
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
279-
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
280-
[this](const auto *R) {
281-
// Handle header phi recipes, except VPWidenIntOrFpInduction
282-
// which needs special handling due it being possibly truncated.
283-
// TODO: consider inferring/caching type of siblings, e.g.,
284-
// backedge value, here and in cases below.
285-
return inferScalarType(R->getStartValue());
286-
})
280+
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
281+
VPLastActiveMaskPHIRecipe>([this](const auto *R) {
282+
// Handle header phi recipes, except VPWidenIntOrFpInduction
283+
// which needs special handling due it being possibly truncated.
284+
// TODO: consider inferring/caching type of siblings, e.g.,
285+
// backedge value, here and in cases below.
286+
return inferScalarType(R->getStartValue());
287+
})
287288
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
288289
[](const auto *R) { return R->getScalarType(); })
289290
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,

0 commit comments

Comments
 (0)