Skip to content

Commit 049428d

Browse files
committed
[LV] Vectorize conditional scalar assignments
Based on Michael Maitland's previous work: llvm#121222 This PR uses the existing recurrences code instead of introducing a new pass just for CSA autovec. I've also made recipes that are more generic. I've enabled it by default to see the impact on tests; if there are regressions we can put it behind a cli option.
1 parent ac047f2 commit 049428d

20 files changed

+2239
-300
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ enum class RecurKind {
7070
FindLastIVUMax, ///< FindLast reduction with select(cmp(),x,y) where one of
7171
///< (x,y) is increasing loop induction, and both x and y
7272
///< are integer type, producing a UMax reduction.
73+
FindLast, ///< FindLast reduction with select(cmp(),x,y) where x and y
74+
///< are an integer type, one is the current recurrence value,
75+
///< and the other is an arbitrary value.
7376
// clang-format on
7477
// TODO: Any_of and FindLast reduction need not be restricted to integer type
7578
// only.
@@ -175,13 +178,12 @@ class RecurrenceDescriptor {
175178
/// Returns a struct describing whether the instruction is either a
176179
/// Select(ICmp(A, B), X, Y), or
177180
/// Select(FCmp(A, B), X, Y)
178-
/// where one of (X, Y) is an increasing (FindLast) or decreasing (FindFirst)
179-
/// loop induction variable, and the other is a PHI value.
180-
// TODO: Support non-monotonic variable. FindLast does not need be restricted
181-
// to increasing loop induction variables.
182-
LLVM_ABI static InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop,
183-
PHINode *OrigPhi, Instruction *I,
184-
ScalarEvolution &SE);
181+
/// where one of (X, Y) is an increasing (FindLastIV) or decreasing
182+
/// (FindFirstIV) loop induction variable, or an arbitrary integer value
183+
/// (FindLast), and the other is a PHI value.
184+
LLVM_ABI static InstDesc isFindPattern(RecurKind Kind, Loop *TheLoop,
185+
PHINode *OrigPhi, Instruction *I,
186+
ScalarEvolution &SE);
185187

186188
/// Returns a struct describing if the instruction is a
187189
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
@@ -305,6 +307,13 @@ class RecurrenceDescriptor {
305307
isFindLastIVRecurrenceKind(Kind);
306308
}
307309

310+
/// Returns true if the recurrence kind is of the form
311+
/// select(cmp(),x,y) where one of (x,y) is an arbitrary value and the
312+
/// other is a recurrence.
313+
static bool isFindLastRecurrenceKind(RecurKind Kind) {
314+
return Kind == RecurKind::FindLast;
315+
}
316+
308317
/// Returns the type of the recurrence. This type can be narrower than the
309318
/// actual type of the Phi if the recurrence has been type-promoted.
310319
Type *getRecurrenceType() const { return RecurrenceType; }

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
5656
case RecurKind::FindFirstIVUMin:
5757
case RecurKind::FindLastIVSMax:
5858
case RecurKind::FindLastIVUMax:
59+
// TODO: Make type-agnostic.
60+
case RecurKind::FindLast:
5961
return true;
6062
}
6163
return false;
@@ -691,9 +693,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
691693
// value of the data type or a non-constant value by using mask and multiple
692694
// reduction operations.
693695
RecurrenceDescriptor::InstDesc
694-
RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
695-
PHINode *OrigPhi, Instruction *I,
696-
ScalarEvolution &SE) {
696+
RecurrenceDescriptor::isFindPattern(RecurKind Kind, Loop *TheLoop,
697+
PHINode *OrigPhi, Instruction *I,
698+
ScalarEvolution &SE) {
697699
// TODO: Support the vectorization of FindLastIV when the reduction phi is
698700
// used by more than one select instruction. This vectorization is only
699701
// performed when the SCEV of each increasing induction variable used by the
@@ -702,8 +704,10 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
702704
return InstDesc(false, I);
703705

704706
// We are looking for selects of the form:
705-
// select(cmp(), phi, loop_induction) or
706-
// select(cmp(), loop_induction, phi)
707+
// select(cmp(), phi, value) or
708+
// select(cmp(), value, phi)
709+
// where 'value' is be a loop induction variable
710+
// (for FindFirstIV/FindLastIV) or an arbitrary value (for FindLast).
707711
// TODO: Match selects with multi-use cmp conditions.
708712
Value *NonRdxPhi = nullptr;
709713
if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
@@ -712,6 +716,25 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
712716
m_Value(NonRdxPhi)))))
713717
return InstDesc(false, I);
714718

719+
if (isFindLastRecurrenceKind(Kind)) {
720+
// Must be an integer scalar.
721+
Type *Type = OrigPhi->getType();
722+
if (!Type->isIntegerTy())
723+
return InstDesc(false, I);
724+
725+
// FIXME: Support more complex patterns, including multiple selects.
726+
// The Select must be used only outside the loop and by the PHI.
727+
for (User *U : I->users()) {
728+
if (U == OrigPhi)
729+
continue;
730+
if (auto *UI = dyn_cast<Instruction>(U); UI && !TheLoop->contains(UI))
731+
continue;
732+
return InstDesc(false, I);
733+
}
734+
735+
return InstDesc(I, RecurKind::FindLast);
736+
}
737+
715738
// Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
716739
// std::nullopt.
717740
auto GetRecurKind = [&](Value *V) -> std::optional<RecurKind> {
@@ -920,8 +943,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
920943
Kind == RecurKind::Add || Kind == RecurKind::Mul ||
921944
Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs)
922945
return isConditionalRdxPattern(I);
923-
if (isFindIVRecurrenceKind(Kind) && SE)
924-
return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
946+
if ((isFindIVRecurrenceKind(Kind) || isFindLastRecurrenceKind(Kind)) && SE)
947+
return isFindPattern(Kind, L, OrigPhi, I, *SE);
925948
[[fallthrough]];
926949
case Instruction::FCmp:
927950
case Instruction::ICmp:
@@ -1118,7 +1141,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
11181141
<< "\n");
11191142
return true;
11201143
}
1121-
1144+
if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC,
1145+
DT, SE)) {
1146+
LLVM_DEBUG(dbgs() << "Found a FindLast reduction PHI." << *Phi << "\n");
1147+
return true;
1148+
}
11221149
// Not a reduction of known type.
11231150
return false;
11241151
}
@@ -1248,6 +1275,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
12481275
case RecurKind::FMaximumNum:
12491276
case RecurKind::FMinimumNum:
12501277
return Instruction::FCmp;
1278+
case RecurKind::FindLast:
1279+
return Instruction::Select;
12511280
default:
12521281
llvm_unreachable("Unknown recurrence operation");
12531282
}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5451,6 +5451,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
54515451
case RecurKind::FMax:
54525452
case RecurKind::FMulAdd:
54535453
case RecurKind::AnyOf:
5454+
case RecurKind::FindLast:
54545455
return true;
54555456
default:
54565457
return false;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,6 +1275,7 @@ class LoopVectorizationCostModel {
12751275
"from latch block\n");
12761276
return true;
12771277
}
1278+
12781279
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
12791280
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
12801281
"interleaved group requires scalar epilogue\n");
@@ -4045,6 +4046,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40454046
continue;
40464047
case VPDef::VPReductionSC:
40474048
case VPDef::VPActiveLaneMaskPHISC:
4049+
case VPDef::VPLastActiveMaskPHISC:
40484050
case VPDef::VPWidenCallSC:
40494051
case VPDef::VPWidenCanonicalIVSC:
40504052
case VPDef::VPWidenCastSC:
@@ -4265,11 +4267,15 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
42654267
ElementCount VF) const {
42664268
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
42674269
// reductions need special handling and are currently unsupported.
4270+
// FindLast reductions also require special handling for the synthesized
4271+
// mask PHI.
42684272
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
42694273
if (!Legal->isReductionVariable(&Phi))
42704274
return Legal->isFixedOrderRecurrence(&Phi);
4271-
return RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
4272-
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind());
4275+
RecurKind Kind =
4276+
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
4277+
return RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) ||
4278+
RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(Kind);
42734279
}))
42744280
return false;
42754281

@@ -4559,6 +4565,12 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
45594565
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
45604566
IsaPred<VPReductionPHIRecipe>);
45614567

4568+
// FIXME: implement interleaving for FindLast transform correctly.
4569+
for (auto &[_, RdxDesc] : Legal->getReductionVars())
4570+
if (RecurrenceDescriptor::isFindLastRecurrenceKind(
4571+
RdxDesc.getRecurrenceKind()))
4572+
return 1;
4573+
45624574
// If we did not calculate the cost for VF (because the user selected the VF)
45634575
// then we calculate the cost of VF here.
45644576
if (LoopCost == 0) {
@@ -8488,6 +8500,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84888500
*Plan, Builder))
84898501
return nullptr;
84908502

8503+
// Create whole-vector selects for find-last recurrences.
8504+
VPlanTransforms::runPass(VPlanTransforms::convertFindLastRecurrences, *Plan,
8505+
RecipeBuilder, Legal);
8506+
84918507
if (useActiveLaneMask(Style)) {
84928508
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
84938509
// TailFoldingStyle is visible there.
@@ -8581,10 +8597,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
85818597
continue;
85828598

85838599
RecurKind Kind = PhiR->getRecurrenceKind();
8584-
assert(
8585-
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
8586-
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
8587-
"AnyOf and FindIV reductions are not allowed for in-loop reductions");
8600+
assert(!RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) &&
8601+
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
8602+
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
8603+
"AnyOf, FindIV, and FindLast reductions are not allowed for in-loop "
8604+
"reductions");
85888605

85898606
// Collect the chain of "link" recipes for the reduction starting at PhiR.
85908607
SetVector<VPSingleDefRecipe *> Worklist;
@@ -8884,7 +8901,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
88848901
RecurKind RK = RdxDesc.getRecurrenceKind();
88858902
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
88868903
!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
8887-
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
8904+
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) &&
8905+
!RecurrenceDescriptor::isFindLastRecurrenceKind(RK))) {
88888906
VPBuilder PHBuilder(Plan->getVectorPreheader());
88898907
VPValue *Iden = Plan->getOrAddLiveIn(
88908908
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
@@ -9294,7 +9312,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
92949312
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
92959313
for (VPRecipeBase &R :
92969314
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9297-
if (isa<VPCanonicalIVPHIRecipe>(&R))
9315+
if (isa<VPCanonicalIVPHIRecipe, VPLastActiveMaskPHIRecipe>(&R))
92989316
continue;
92999317
EpiWidenedPhis.insert(
93009318
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
@@ -9491,6 +9509,10 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
94919509
continue;
94929510
}
94939511
}
9512+
} else if (isa<VPLastActiveMaskPHIRecipe>(R)) {
9513+
// LastActiveMasks are only used as part of FindLast reductions,
9514+
// and aren't passed to the scalar loop.
9515+
continue;
94949516
} else {
94959517
// Retrieve the induction resume values for wide inductions from
94969518
// their original phi nodes in the scalar loop.
@@ -10006,6 +10028,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1000610028
// Override IC if user provided an interleave count.
1000710029
IC = UserIC > 0 ? UserIC : IC;
1000810030

10031+
// FIXME: Enable interleaving for last_active reductions.
10032+
if (any_of(make_second_range(LVL.getReductionVars()), [&](auto &RdxDesc) {
10033+
return RecurrenceDescriptor::isFindLastRecurrenceKind(
10034+
RdxDesc.getRecurrenceKind());
10035+
})) {
10036+
LLVM_DEBUG(dbgs() << "LV: Not interleaving without vectorization due "
10037+
<< "to conditional scalar assignments.\n");
10038+
IntDiagMsg = {
10039+
"ConditionalAssignmentPreventsScalarInterleaving",
10040+
"Unable to interleave without vectorization due to conditional "
10041+
"assignments"};
10042+
InterleaveLoop = false;
10043+
IC = 1;
10044+
}
10045+
1000910046
// Emit diagnostic messages, if any.
1001010047
const char *VAPassName = Hints.vectorizeAnalysisPassName();
1001110048
if (!VectorizeLoop && !InterleaveLoop) {

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25174,6 +25174,7 @@ class HorizontalReduction {
2517425174
case RecurKind::FindFirstIVUMin:
2517525175
case RecurKind::FindLastIVSMax:
2517625176
case RecurKind::FindLastIVUMax:
25177+
case RecurKind::FindLast:
2517725178
case RecurKind::FMaxNum:
2517825179
case RecurKind::FMinNum:
2517925180
case RecurKind::FMaximumNum:
@@ -25315,6 +25316,7 @@ class HorizontalReduction {
2531525316
case RecurKind::FindFirstIVUMin:
2531625317
case RecurKind::FindLastIVSMax:
2531725318
case RecurKind::FindLastIVUMax:
25319+
case RecurKind::FindLast:
2531825320
case RecurKind::FMaxNum:
2531925321
case RecurKind::FMinNum:
2532025322
case RecurKind::FMaximumNum:
@@ -25421,6 +25423,7 @@ class HorizontalReduction {
2542125423
case RecurKind::FindFirstIVUMin:
2542225424
case RecurKind::FindLastIVSMax:
2542325425
case RecurKind::FindLastIVUMax:
25426+
case RecurKind::FindLast:
2542425427
case RecurKind::FMaxNum:
2542525428
case RecurKind::FMinNum:
2542625429
case RecurKind::FMaximumNum:

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
548548
case VPRecipeBase::VPPredInstPHISC:
549549
case VPRecipeBase::VPCanonicalIVPHISC:
550550
case VPRecipeBase::VPActiveLaneMaskPHISC:
551+
case VPRecipeBase::VPLastActiveMaskPHISC:
551552
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
552553
case VPRecipeBase::VPWidenPHISC:
553554
case VPRecipeBase::VPWidenIntOrFpInductionSC:
@@ -1064,6 +1065,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
10641065
/// Returns the value for vscale.
10651066
VScale,
10661067
OpsEnd = VScale,
1068+
/// Extracts the last active lane based on a predicate vector operand.
1069+
ExtractLastActive,
10671070
};
10681071

10691072
/// Returns true if this VPInstruction generates scalar values for all lanes.
@@ -3556,6 +3559,40 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
35563559
#endif
35573560
};
35583561

3562+
// TODO: Can we unify the PHI recipe hierarchy a bit? VPPredInstPHISC is close
3563+
// to this (just a PHI of a predicate), but isn't a header phi so can't
3564+
// be used for the mask of FindLastActive reductions.
3565+
//
3566+
// This is basically a clone of VPActiveLaneMaskPHIRecipe, but won't run into
3567+
// problems with transforms that expect there to only be a single ALM PHI, and
3568+
// can be ignored by other code looking for a (non-existent) underlying value.
3569+
class VPLastActiveMaskPHIRecipe : public VPHeaderPHIRecipe {
3570+
public:
3571+
VPLastActiveMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3572+
: VPHeaderPHIRecipe(VPDef::VPLastActiveMaskPHISC, nullptr, StartMask,
3573+
DL) {}
3574+
3575+
~VPLastActiveMaskPHIRecipe() override = default;
3576+
3577+
VPLastActiveMaskPHIRecipe *clone() override {
3578+
auto *R = new VPLastActiveMaskPHIRecipe(getOperand(0), getDebugLoc());
3579+
if (getNumOperands() == 2)
3580+
R->addOperand(getOperand(1));
3581+
return R;
3582+
}
3583+
3584+
VP_CLASSOF_IMPL(VPDef::VPLastActiveMaskPHISC);
3585+
3586+
/// Generate the mask phi
3587+
void execute(VPTransformState &State) override;
3588+
3589+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3590+
/// Print the recipe
3591+
void print(raw_ostream &O, const Twine &Indent,
3592+
VPSlotTracker &SlotTracker) const override;
3593+
#endif
3594+
};
3595+
35593596
/// A recipe for generating the phi node for the current index of elements,
35603597
/// adjusted in accordance with EVL value. It starts at the start value of the
35613598
/// canonical induction and gets incremented by EVL in each iteration of the

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
118118
return Type::getIntNTy(Ctx, 64);
119119
case VPInstruction::ExtractLastElement:
120120
case VPInstruction::ExtractLastLanePerPart:
121-
case VPInstruction::ExtractPenultimateElement: {
121+
case VPInstruction::ExtractPenultimateElement:
122+
case VPInstruction::ExtractLastActive: {
122123
Type *BaseTy = inferScalarType(R->getOperand(0));
123124
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
124125
return VecTy->getElementType();
@@ -276,14 +277,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
276277
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
277278
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
278279
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
279-
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
280-
[this](const auto *R) {
281-
// Handle header phi recipes, except VPWidenIntOrFpInduction
282-
// which needs special handling due it being possibly truncated.
283-
// TODO: consider inferring/caching type of siblings, e.g.,
284-
// backedge value, here and in cases below.
285-
return inferScalarType(R->getStartValue());
286-
})
280+
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
281+
VPLastActiveMaskPHIRecipe>([this](const auto *R) {
282+
// Handle header phi recipes, except VPWidenIntOrFpInduction
283+
// which needs special handling due it being possibly truncated.
284+
// TODO: consider inferring/caching type of siblings, e.g.,
285+
// backedge value, here and in cases below.
286+
return inferScalarType(R->getStartValue());
287+
})
287288
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
288289
[](const auto *R) { return R->getScalarType(); })
289290
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,

0 commit comments

Comments
 (0)