Skip to content

Commit e08017c

Browse files
committed
[LV] Vectorize conditional scalar assignments
Based on Michael Maitland's previous work: llvm#121222 This PR uses the existing recurrences code instead of introducing a new pass just for CSA autovec. I've also made recipes that are more generic. I've enabled it by default to see the impact on tests; if there are regressions we can put it behind a cli option.
1 parent d6f9205 commit e08017c

20 files changed

+2238
-300
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ enum class RecurKind {
7070
FindLastIVUMax, ///< FindLast reduction with select(cmp(),x,y) where one of
7171
///< (x,y) is increasing loop induction, and both x and y
7272
///< are integer type, producing a UMax reduction.
73+
FindLast, ///< FindLast reduction with select(cmp(),x,y) where x and y
74+
///< are an integer type, one is the current recurrence value,
75+
///< and the other is an arbitrary value.
7376
// clang-format on
7477
// TODO: Any_of and FindLast reduction need not be restricted to integer type
7578
// only.
@@ -180,13 +183,12 @@ class RecurrenceDescriptor {
180183
/// Returns a struct describing whether the instruction is either a
181184
/// Select(ICmp(A, B), X, Y), or
182185
/// Select(FCmp(A, B), X, Y)
183-
/// where one of (X, Y) is an increasing (FindLast) or decreasing (FindFirst)
184-
/// loop induction variable, and the other is a PHI value.
185-
// TODO: Support non-monotonic variable. FindLast does not need be restricted
186-
// to increasing loop induction variables.
187-
LLVM_ABI static InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop,
188-
PHINode *OrigPhi, Instruction *I,
189-
ScalarEvolution &SE);
186+
/// where one of (X, Y) is an increasing (FindLastIV) or decreasing
187+
/// (FindFirstIV) loop induction variable, or an arbitrary integer value
188+
/// (FindLast), and the other is a PHI value.
189+
LLVM_ABI static InstDesc isFindPattern(RecurKind Kind, Loop *TheLoop,
190+
PHINode *OrigPhi, Instruction *I,
191+
ScalarEvolution &SE);
190192

191193
/// Returns a struct describing if the instruction is a
192194
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
@@ -310,6 +312,13 @@ class RecurrenceDescriptor {
310312
isFindLastIVRecurrenceKind(Kind);
311313
}
312314

315+
/// Returns true if the recurrence kind is of the form
316+
/// select(cmp(),x,y) where one of (x,y) is an arbitrary value and the
317+
/// other is a recurrence.
318+
static bool isFindLastRecurrenceKind(RecurKind Kind) {
319+
return Kind == RecurKind::FindLast;
320+
}
321+
313322
/// Returns the type of the recurrence. This type can be narrower than the
314323
/// actual type of the Phi if the recurrence has been type-promoted.
315324
Type *getRecurrenceType() const { return RecurrenceType; }

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
5858
case RecurKind::FindFirstIVUMin:
5959
case RecurKind::FindLastIVSMax:
6060
case RecurKind::FindLastIVUMax:
61+
// TODO: Make type-agnostic.
62+
case RecurKind::FindLast:
6163
return true;
6264
}
6365
return false;
@@ -746,9 +748,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
746748
// value of the data type or a non-constant value by using mask and multiple
747749
// reduction operations.
748750
RecurrenceDescriptor::InstDesc
749-
RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
750-
PHINode *OrigPhi, Instruction *I,
751-
ScalarEvolution &SE) {
751+
RecurrenceDescriptor::isFindPattern(RecurKind Kind, Loop *TheLoop,
752+
PHINode *OrigPhi, Instruction *I,
753+
ScalarEvolution &SE) {
752754
// TODO: Support the vectorization of FindLastIV when the reduction phi is
753755
// used by more than one select instruction. This vectorization is only
754756
// performed when the SCEV of each increasing induction variable used by the
@@ -757,8 +759,10 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
757759
return InstDesc(false, I);
758760

759761
// We are looking for selects of the form:
760-
// select(cmp(), phi, loop_induction) or
761-
// select(cmp(), loop_induction, phi)
762+
// select(cmp(), phi, value) or
763+
// select(cmp(), value, phi)
764+
// where 'value' is be a loop induction variable
765+
// (for FindFirstIV/FindLastIV) or an arbitrary value (for FindLast).
762766
// TODO: Match selects with multi-use cmp conditions.
763767
Value *NonRdxPhi = nullptr;
764768
if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
@@ -767,6 +771,25 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
767771
m_Value(NonRdxPhi)))))
768772
return InstDesc(false, I);
769773

774+
if (isFindLastRecurrenceKind(Kind)) {
775+
// Must be an integer scalar.
776+
Type *Type = OrigPhi->getType();
777+
if (!Type->isIntegerTy())
778+
return InstDesc(false, I);
779+
780+
// FIXME: Support more complex patterns, including multiple selects.
781+
// The Select must be used only outside the loop and by the PHI.
782+
for (User *U : I->users()) {
783+
if (U == OrigPhi)
784+
continue;
785+
if (auto *UI = dyn_cast<Instruction>(U); UI && !TheLoop->contains(UI))
786+
continue;
787+
return InstDesc(false, I);
788+
}
789+
790+
return InstDesc(I, RecurKind::FindLast);
791+
}
792+
770793
// Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
771794
// std::nullopt.
772795
auto GetRecurKind = [&](Value *V) -> std::optional<RecurKind> {
@@ -976,8 +999,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
976999
Kind == RecurKind::Add || Kind == RecurKind::Mul ||
9771000
Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs)
9781001
return isConditionalRdxPattern(I);
979-
if (isFindIVRecurrenceKind(Kind) && SE)
980-
return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
1002+
if ((isFindIVRecurrenceKind(Kind) || isFindLastRecurrenceKind(Kind)) && SE)
1003+
return isFindPattern(Kind, L, OrigPhi, I, *SE);
9811004
[[fallthrough]];
9821005
case Instruction::FCmp:
9831006
case Instruction::ICmp:
@@ -1174,7 +1197,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
11741197
<< "\n");
11751198
return true;
11761199
}
1177-
1200+
if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC,
1201+
DT, SE)) {
1202+
LLVM_DEBUG(dbgs() << "Found a FindLast reduction PHI." << *Phi << "\n");
1203+
return true;
1204+
}
11781205
// Not a reduction of known type.
11791206
return false;
11801207
}
@@ -1299,6 +1326,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
12991326
case RecurKind::FMaximumNum:
13001327
case RecurKind::FMinimumNum:
13011328
return Instruction::FCmp;
1329+
case RecurKind::FindLast:
1330+
return Instruction::Select;
13021331
case RecurKind::AnyOf:
13031332
case RecurKind::FindFirstIVSMin:
13041333
case RecurKind::FindFirstIVUMin:

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5488,6 +5488,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
54885488
case RecurKind::FMax:
54895489
case RecurKind::FMulAdd:
54905490
case RecurKind::AnyOf:
5491+
case RecurKind::FindLast:
54915492
return true;
54925493
default:
54935494
return false;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,6 +1293,7 @@ class LoopVectorizationCostModel {
12931293
"from latch block\n");
12941294
return true;
12951295
}
1296+
12961297
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
12971298
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
12981299
"interleaved group requires scalar epilogue\n");
@@ -4084,6 +4085,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40844085
continue;
40854086
case VPDef::VPReductionSC:
40864087
case VPDef::VPActiveLaneMaskPHISC:
4088+
case VPDef::VPLastActiveMaskPHISC:
40874089
case VPDef::VPWidenCallSC:
40884090
case VPDef::VPWidenCanonicalIVSC:
40894091
case VPDef::VPWidenCastSC:
@@ -4302,11 +4304,15 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
43024304
ElementCount VF) const {
43034305
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
43044306
// reductions need special handling and are currently unsupported.
4307+
// FindLast reductions also require special handling for the synthesized
4308+
// mask PHI.
43054309
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
43064310
if (!Legal->isReductionVariable(&Phi))
43074311
return Legal->isFixedOrderRecurrence(&Phi);
4308-
return RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
4309-
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind());
4312+
RecurKind Kind =
4313+
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
4314+
return RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) ||
4315+
RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(Kind);
43104316
}))
43114317
return false;
43124318

@@ -4612,6 +4618,12 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
46124618
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
46134619
IsaPred<VPReductionPHIRecipe>);
46144620

4621+
// FIXME: implement interleaving for FindLast transform correctly.
4622+
for (auto &[_, RdxDesc] : Legal->getReductionVars())
4623+
if (RecurrenceDescriptor::isFindLastRecurrenceKind(
4624+
RdxDesc.getRecurrenceKind()))
4625+
return 1;
4626+
46154627
// If we did not calculate the cost for VF (because the user selected the VF)
46164628
// then we calculate the cost of VF here.
46174629
if (LoopCost == 0) {
@@ -8624,6 +8636,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
86248636
*Plan, Builder))
86258637
return nullptr;
86268638

8639+
// Create whole-vector selects for find-last recurrences.
8640+
VPlanTransforms::runPass(VPlanTransforms::convertFindLastRecurrences, *Plan,
8641+
RecipeBuilder, Legal);
8642+
86278643
if (useActiveLaneMask(Style)) {
86288644
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
86298645
// TailFoldingStyle is visible there.
@@ -8707,10 +8723,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87078723
continue;
87088724

87098725
RecurKind Kind = PhiR->getRecurrenceKind();
8710-
assert(
8711-
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
8712-
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
8713-
"AnyOf and FindIV reductions are not allowed for in-loop reductions");
8726+
assert(!RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) &&
8727+
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
8728+
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
8729+
"AnyOf, FindIV, and FindLast reductions are not allowed for in-loop "
8730+
"reductions");
87148731

87158732
bool IsFPRecurrence =
87168733
RecurrenceDescriptor::isFloatingPointRecurrenceKind(Kind);
@@ -9017,7 +9034,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90179034
RecurKind RK = RdxDesc.getRecurrenceKind();
90189035
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
90199036
!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
9020-
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
9037+
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) &&
9038+
!RecurrenceDescriptor::isFindLastRecurrenceKind(RK))) {
90219039
VPBuilder PHBuilder(Plan->getVectorPreheader());
90229040
VPValue *Iden = Plan->getOrAddLiveIn(
90239041
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
@@ -9430,7 +9448,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
94309448
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
94319449
for (VPRecipeBase &R :
94329450
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9433-
if (isa<VPCanonicalIVPHIRecipe>(&R))
9451+
if (isa<VPCanonicalIVPHIRecipe, VPLastActiveMaskPHIRecipe>(&R))
94349452
continue;
94359453
EpiWidenedPhis.insert(
94369454
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
@@ -9627,6 +9645,10 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
96279645
continue;
96289646
}
96299647
}
9648+
} else if (isa<VPLastActiveMaskPHIRecipe>(R)) {
9649+
// LastActiveMasks are only used as part of FindLast reductions,
9650+
// and aren't passed to the scalar loop.
9651+
continue;
96309652
} else {
96319653
// Retrieve the induction resume values for wide inductions from
96329654
// their original phi nodes in the scalar loop.
@@ -10148,6 +10170,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1014810170
// Override IC if user provided an interleave count.
1014910171
IC = UserIC > 0 ? UserIC : IC;
1015010172

10173+
// FIXME: Enable interleaving for last_active reductions.
10174+
if (any_of(make_second_range(LVL.getReductionVars()), [&](auto &RdxDesc) {
10175+
return RecurrenceDescriptor::isFindLastRecurrenceKind(
10176+
RdxDesc.getRecurrenceKind());
10177+
})) {
10178+
LLVM_DEBUG(dbgs() << "LV: Not interleaving without vectorization due "
10179+
<< "to conditional scalar assignments.\n");
10180+
IntDiagMsg = {
10181+
"ConditionalAssignmentPreventsScalarInterleaving",
10182+
"Unable to interleave without vectorization due to conditional "
10183+
"assignments"};
10184+
InterleaveLoop = false;
10185+
IC = 1;
10186+
}
10187+
1015110188
// Emit diagnostic messages, if any.
1015210189
const char *VAPassName = Hints.vectorizeAnalysisPassName();
1015310190
if (!VectorizeLoop && !InterleaveLoop) {

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25401,6 +25401,7 @@ class HorizontalReduction {
2540125401
case RecurKind::FindFirstIVUMin:
2540225402
case RecurKind::FindLastIVSMax:
2540325403
case RecurKind::FindLastIVUMax:
25404+
case RecurKind::FindLast:
2540425405
case RecurKind::FMaxNum:
2540525406
case RecurKind::FMinNum:
2540625407
case RecurKind::FMaximumNum:
@@ -25542,6 +25543,7 @@ class HorizontalReduction {
2554225543
case RecurKind::FindFirstIVUMin:
2554325544
case RecurKind::FindLastIVSMax:
2554425545
case RecurKind::FindLastIVUMax:
25546+
case RecurKind::FindLast:
2554525547
case RecurKind::FMaxNum:
2554625548
case RecurKind::FMinNum:
2554725549
case RecurKind::FMaximumNum:
@@ -25648,6 +25650,7 @@ class HorizontalReduction {
2564825650
case RecurKind::FindFirstIVUMin:
2564925651
case RecurKind::FindLastIVSMax:
2565025652
case RecurKind::FindLastIVUMax:
25653+
case RecurKind::FindLast:
2565125654
case RecurKind::FMaxNum:
2565225655
case RecurKind::FMinNum:
2565325656
case RecurKind::FMaximumNum:

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
562562
case VPRecipeBase::VPPredInstPHISC:
563563
case VPRecipeBase::VPCanonicalIVPHISC:
564564
case VPRecipeBase::VPActiveLaneMaskPHISC:
565+
case VPRecipeBase::VPLastActiveMaskPHISC:
565566
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
566567
case VPRecipeBase::VPWidenPHISC:
567568
case VPRecipeBase::VPWidenIntOrFpInductionSC:
@@ -1128,6 +1129,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
11281129
/// Returns the value for vscale.
11291130
VScale,
11301131
OpsEnd = VScale,
1132+
/// Extracts the last active lane based on a predicate vector operand.
1133+
ExtractLastActive,
11311134
};
11321135

11331136
/// Returns true if this VPInstruction generates scalar values for all lanes.
@@ -3635,6 +3638,40 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
36353638
#endif
36363639
};
36373640

3641+
// TODO: Can we unify the PHI recipe hierarchy a bit? VPPredInstPHISC is close
3642+
// to this (just a PHI of a predicate), but isn't a header phi so can't
3643+
// be used for the mask of FindLastActive reductions.
3644+
//
3645+
// This is basically a clone of VPActiveLaneMaskPHIRecipe, but won't run into
3646+
// problems with transforms that expect there to only be a single ALM PHI, and
3647+
// can be ignored by other code looking for a (non-existent) underlying value.
3648+
class VPLastActiveMaskPHIRecipe : public VPHeaderPHIRecipe {
3649+
public:
3650+
VPLastActiveMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3651+
: VPHeaderPHIRecipe(VPDef::VPLastActiveMaskPHISC, nullptr, StartMask,
3652+
DL) {}
3653+
3654+
~VPLastActiveMaskPHIRecipe() override = default;
3655+
3656+
VPLastActiveMaskPHIRecipe *clone() override {
3657+
auto *R = new VPLastActiveMaskPHIRecipe(getOperand(0), getDebugLoc());
3658+
if (getNumOperands() == 2)
3659+
R->addOperand(getOperand(1));
3660+
return R;
3661+
}
3662+
3663+
VP_CLASSOF_IMPL(VPDef::VPLastActiveMaskPHISC);
3664+
3665+
/// Generate the mask phi
3666+
void execute(VPTransformState &State) override;
3667+
3668+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3669+
/// Print the recipe
3670+
void print(raw_ostream &O, const Twine &Indent,
3671+
VPSlotTracker &SlotTracker) const override;
3672+
#endif
3673+
};
3674+
36383675
/// A recipe for generating the phi node for the current index of elements,
36393676
/// adjusted in accordance with EVL value. It starts at the start value of the
36403677
/// canonical induction and gets incremented by EVL in each iteration of the

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
121121
return Type::getIntNTy(Ctx, 64);
122122
case VPInstruction::ExtractLastElement:
123123
case VPInstruction::ExtractLastLanePerPart:
124-
case VPInstruction::ExtractPenultimateElement: {
124+
case VPInstruction::ExtractPenultimateElement:
125+
case VPInstruction::ExtractLastActive: {
125126
Type *BaseTy = inferScalarType(R->getOperand(0));
126127
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
127128
return VecTy->getElementType();
@@ -279,14 +280,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
279280
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
280281
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
281282
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
282-
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
283-
[this](const auto *R) {
284-
// Handle header phi recipes, except VPWidenIntOrFpInduction
285-
// which needs special handling due it being possibly truncated.
286-
// TODO: consider inferring/caching type of siblings, e.g.,
287-
// backedge value, here and in cases below.
288-
return inferScalarType(R->getStartValue());
289-
})
283+
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
284+
VPLastActiveMaskPHIRecipe>([this](const auto *R) {
285+
// Handle header phi recipes, except VPWidenIntOrFpInduction
286+
// which needs special handling due it being possibly truncated.
287+
// TODO: consider inferring/caching type of siblings, e.g.,
288+
// backedge value, here and in cases below.
289+
return inferScalarType(R->getStartValue());
290+
})
290291
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
291292
[](const auto *R) { return R->getScalarType(); })
292293
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,

0 commit comments

Comments
 (0)