Skip to content

Commit 935a644

Browse files
committed
[LoopVectorize] Support vectorization of compressing patterns in VPlan
1 parent 931833c commit 935a644

File tree

10 files changed

+405
-35
lines changed

10 files changed

+405
-35
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,10 @@ class LoopVectorizationLegality {
272272
/// induction descriptor.
273273
using InductionList = MapVector<PHINode *, InductionDescriptor>;
274274

275+
/// MonotonicPHIList saves monotonic phi variables and maps them to the
276+
/// monotonic phi descriptor.
277+
using MonotonicPHIList = MapVector<PHINode *, MonotonicDescriptor>;
278+
275279
/// RecurrenceSet contains the phi nodes that are recurrences other than
276280
/// inductions and reductions.
277281
using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
@@ -315,6 +319,11 @@ class LoopVectorizationLegality {
315319
/// Returns the induction variables found in the loop.
316320
const InductionList &getInductionVars() const { return Inductions; }
317321

322+
/// Returns the monotonic phi variables found in the loop.
323+
const MonotonicPHIList &getMonotonicPHIs() const { return MonotonicPHIs; }
324+
325+
bool hasMonotonicPHIs() const { return !MonotonicPHIs.empty(); }
326+
318327
/// Return the fixed-order recurrences found in the loop.
319328
RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
320329

@@ -372,6 +381,12 @@ class LoopVectorizationLegality {
372381
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
373382
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
374383

384+
/// Returns true if Phi is monotonic variable.
385+
bool isMonotonicPHI(PHINode *Phi) const;
386+
387+
/// Check if memory access is compressed when vectorizing.
388+
bool isCompressedPtr(Type *AccessTy, Value *Ptr, BasicBlock *BB) const;
389+
375390
/// Returns true if \p V is invariant across all loop iterations according to
376391
/// SCEV.
377392
bool isInvariant(Value *V) const;
@@ -677,6 +692,9 @@ class LoopVectorizationLegality {
677692
/// variables can be pointers.
678693
InductionList Inductions;
679694

695+
/// Holds all of the monotonic phi variables that we found in the loop.
696+
MonotonicPHIList MonotonicPHIs;
697+
680698
/// Holds all the casts that participate in the update chain of the induction
681699
/// variables, and that have been proven to be redundant (possibly under a
682700
/// runtime guard). These casts can be ignored when creating the vectorized

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
4545
cl::desc("Enable recognition of non-constant strided "
4646
"pointer induction variables."));
4747

48+
static cl::opt<bool> EnableMonotonicPatterns(
49+
"lv-monotonic-patterns", cl::init(true), cl::Hidden,
50+
cl::desc("Enable recognition of monotonic patterns."));
51+
4852
static cl::opt<bool>
4953
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
5054
cl::desc("Allow enabling loop hints to reorder "
@@ -470,6 +474,30 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
470474
return 0;
471475
}
472476

477+
bool LoopVectorizationLegality::isMonotonicPHI(PHINode *Phi) const {
478+
return MonotonicPHIs.count(Phi);
479+
}
480+
481+
bool LoopVectorizationLegality::isCompressedPtr(Type *AccessTy, Value *Ptr,
482+
BasicBlock *BB) const {
483+
MonotonicDescriptor Desc;
484+
if (!MonotonicDescriptor::isMonotonicVal(Ptr, TheLoop, Desc, *PSE.getSE()))
485+
return false;
486+
487+
// Check if memory operation will use the same mask as monotonic phi.
488+
// TODO: relax restrictions of current implementation.
489+
if (Desc.getPredicateEdge() !=
490+
MonotonicDescriptor::Edge(BB, BB->getUniqueSuccessor()))
491+
return false;
492+
493+
// Check if pointer step equals access size.
494+
auto *Step =
495+
dyn_cast<SCEVConstant>(Desc.getExpr()->getStepRecurrence(*PSE.getSE()));
496+
if (!Step)
497+
return false;
498+
return Step->getAPInt() == BB->getDataLayout().getTypeAllocSize(AccessTy);
499+
}
500+
473501
bool LoopVectorizationLegality::isInvariant(Value *V) const {
474502
return LAI->isInvariant(V);
475503
}
@@ -916,6 +944,13 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
916944
return true;
917945
}
918946

947+
MonotonicDescriptor MD;
948+
if (EnableMonotonicPatterns &&
949+
MonotonicDescriptor::isMonotonicPHI(Phi, TheLoop, MD, *PSE.getSE())) {
950+
MonotonicPHIs[Phi] = MD;
951+
return true;
952+
}
953+
919954
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
920955
AllowedExit.insert(Phi);
921956
FixedOrderRecurrences.insert(Phi);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 104 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,9 +1241,9 @@ class LoopVectorizationCostModel {
12411241
getDivRemSpeculationCost(Instruction *I,
12421242
ElementCount VF) const;
12431243

1244-
/// Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a
1245-
/// memory instruction with consecutive access that can be widened, or
1246-
/// CM_Unknown otherwise.
1244+
/// Returns widening decision (CM_Widen, CM_Widen_Reverse or CM_Compressed) if
1245+
/// \p I is a memory instruction with consecutive access that can be widened,
1246+
/// or CM_Unknown otherwise.
12471247
InstWidening memoryInstructionCanBeWidened(Instruction *I, ElementCount VF);
12481248

12491249
/// Returns true if \p I is a memory instruction in an interleaved-group
@@ -3000,6 +3000,9 @@ LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
30003000
auto *Ptr = getLoadStorePointerOperand(I);
30013001
auto *ScalarTy = getLoadStoreType(I);
30023002

3003+
if (Legal->isCompressedPtr(ScalarTy, Ptr, I->getParent()))
3004+
return CM_Compressed;
3005+
30033006
// In order to be widened, the pointer should be consecutive, first of all.
30043007
auto Stride = Legal->isConsecutivePtr(ScalarTy, Ptr);
30053008
if (!Stride)
@@ -3257,6 +3260,39 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
32573260
AddToWorklistIfAllowed(IndUpdate);
32583261
}
32593262

3263+
// Handle monotonic phis (similarly to induction vars).
3264+
for (const auto &MonotonicPHI : Legal->getMonotonicPHIs()) {
3265+
auto *Phi = MonotonicPHI.first;
3266+
auto *PhiUpdate = cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
3267+
const auto &Desc = MonotonicPHI.second;
3268+
3269+
auto UniformPhi = llvm::all_of(Phi->users(), [&](User *U) -> bool {
3270+
auto *I = cast<Instruction>(U);
3271+
if (I == Desc.getStepInst())
3272+
return true;
3273+
if (auto *PN = dyn_cast<PHINode>(I); PN && Desc.getChain().contains(PN))
3274+
return true;
3275+
return !TheLoop->contains(I) || Worklist.count(I) ||
3276+
IsVectorizedMemAccessUse(I, Phi);
3277+
});
3278+
if (!UniformPhi)
3279+
continue;
3280+
3281+
auto UniformPhiUpdate =
3282+
llvm::all_of(PhiUpdate->users(), [&](User *U) -> bool {
3283+
auto *I = cast<Instruction>(U);
3284+
if (I == Phi)
3285+
return true;
3286+
return !TheLoop->contains(I) || Worklist.count(I) ||
3287+
IsVectorizedMemAccessUse(I, Phi);
3288+
});
3289+
if (!UniformPhiUpdate)
3290+
continue;
3291+
3292+
AddToWorklistIfAllowed(Phi);
3293+
AddToWorklistIfAllowed(PhiUpdate);
3294+
}
3295+
32603296
Uniforms[VF].insert_range(Worklist);
32613297
}
32623298

@@ -4048,6 +4084,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40484084
case VPDef::VPEVLBasedIVPHISC:
40494085
case VPDef::VPPredInstPHISC:
40504086
case VPDef::VPBranchOnMaskSC:
4087+
case VPDef::VPMonotonicPHISC:
40514088
continue;
40524089
case VPDef::VPReductionSC:
40534090
case VPDef::VPActiveLaneMaskPHISC:
@@ -4561,6 +4598,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
45614598
if (Plan.hasEarlyExit())
45624599
return 1;
45634600

4601+
// Monotonic vars don't support interleaving.
4602+
if (Legal->hasMonotonicPHIs())
4603+
return 1;
4604+
45644605
const bool HasReductions =
45654606
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
45664607
IsaPred<VPReductionPHIRecipe>);
@@ -8075,11 +8116,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
80758116
return Recipe;
80768117

80778118
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8078-
assert((Legal->isReductionVariable(Phi) ||
8119+
assert((Legal->isMonotonicPHI(Phi) || Legal->isReductionVariable(Phi) ||
80798120
Legal->isFixedOrderRecurrence(Phi)) &&
8080-
"can only widen reductions and fixed-order recurrences here");
8121+
"can only widen monotonic phis, reductions and fixed-order "
8122+
"recurrences here");
80818123
VPValue *StartV = Operands[0];
8082-
if (Legal->isReductionVariable(Phi)) {
8124+
Value *IncomingVal =
8125+
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader());
8126+
if (Legal->isMonotonicPHI(Phi)) {
8127+
const MonotonicDescriptor &Desc =
8128+
Legal->getMonotonicPHIs().find(Phi)->second;
8129+
assert(Desc.getExpr()->getStart() == PSE.getSCEV(IncomingVal));
8130+
PhiRecipe = new VPMonotonicPHIRecipe(Phi, Desc, StartV);
8131+
} else if (Legal->isReductionVariable(Phi)) {
80838132
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
80848133
assert(RdxDesc.getRecurrenceStartValue() ==
80858134
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
@@ -8430,6 +8479,46 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84308479
// bring the VPlan to its final state.
84318480
// ---------------------------------------------------------------------------
84328481

8482+
// Adjust the recipes for any monotonic phis.
8483+
for (VPRecipeBase &R : HeaderVPBB->phis()) {
8484+
auto *MonotonicPhi = dyn_cast<VPMonotonicPHIRecipe>(&R);
8485+
if (!MonotonicPhi)
8486+
continue;
8487+
8488+
// Prohibit scalarization of monotonic phis.
8489+
if (!all_of(Range, [&](ElementCount VF) {
8490+
return CM.isUniformAfterVectorization(
8491+
MonotonicPhi->getUnderlyingInstr(), VF);
8492+
}))
8493+
return nullptr;
8494+
8495+
// Obtain mask value for the predicate edge from the last VPBlendRecipe in
8496+
// chain.
8497+
VPValue *Chain = MonotonicPhi->getBackedgeValue();
8498+
VPValue *Mask = nullptr;
8499+
while (auto *BlendR = dyn_cast<VPBlendRecipe>(Chain))
8500+
for (unsigned I = 0, E = BlendR->getNumIncomingValues(); I != E; ++I)
8501+
if (auto *IncomingVal = BlendR->getIncomingValue(I);
8502+
IncomingVal != MonotonicPhi) {
8503+
Chain = IncomingVal;
8504+
Mask = BlendR->getMask(I);
8505+
break;
8506+
}
8507+
assert(Mask);
8508+
8509+
auto &Desc = MonotonicPhi->getDescriptor();
8510+
auto &SE = *PSE.getSE();
8511+
auto *Step = vputils::getOrCreateVPValueForSCEVExpr(
8512+
*Plan, Desc.getExpr()->getStepRecurrence(SE));
8513+
8514+
auto *MonotonicI =
8515+
new VPInstruction(VPInstruction::ComputeMonotonicResult,
8516+
{MonotonicPhi, Mask, Step}, *Desc.getStepInst());
8517+
auto *InsertBlock = MonotonicPhi->getBackedgeRecipe().getParent();
8518+
InsertBlock->insert(MonotonicI, InsertBlock->getFirstNonPhi());
8519+
MonotonicPhi->getBackedgeValue()->replaceAllUsesWith(MonotonicI);
8520+
}
8521+
84338522
// Adjust the recipes for any inloop reductions.
84348523
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
84358524

@@ -9892,6 +9981,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98929981
IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
98939982

98949983
unsigned SelectedIC = std::max(IC, UserIC);
9984+
9985+
if (LVL.hasMonotonicPHIs() && SelectedIC > 1) {
9986+
reportVectorizationFailure(
9987+
"Interleaving of loop with monotonic vars",
9988+
"Interleaving of loops with monotonic vars is not supported",
9989+
"CantInterleaveWithMonotonicVars", ORE, L);
9990+
return false;
9991+
}
9992+
98959993
// Optimistically generate runtime checks if they are needed. Drop them if
98969994
// they turn out to not be profitable.
98979995
if (VF.Width.isVector() || SelectedIC > 1) {

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,7 @@ void VPlan::execute(VPTransformState *State) {
984984
auto *PhiR = cast<VPSingleDefRecipe>(&R);
985985
// VPInstructions currently model scalar Phis only.
986986
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
987+
isa<VPMonotonicPHIRecipe>(PhiR) ||
987988
(isa<VPReductionPHIRecipe>(PhiR) &&
988989
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
989990

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
553553
case VPRecipeBase::VPWidenIntOrFpInductionSC:
554554
case VPRecipeBase::VPWidenPointerInductionSC:
555555
case VPRecipeBase::VPReductionPHISC:
556+
case VPRecipeBase::VPMonotonicPHISC:
556557
case VPRecipeBase::VPPartialReductionSC:
557558
return true;
558559
case VPRecipeBase::VPBranchOnMaskSC:
@@ -1014,6 +1015,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
10141015
ComputeAnyOfResult,
10151016
ComputeFindIVResult,
10161017
ComputeReductionResult,
1018+
ComputeMonotonicResult,
10171019
// Extracts the last lane from its operand if it is a vector, or the last
10181020
// part if scalar. In the latter case, the recipe will be removed during
10191021
// unrolling.
@@ -2406,6 +2408,50 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
24062408
}
24072409
};
24082410

2411+
/// A recipe for handling monotonic phis. The start value is the first operand
2412+
/// of the recipe and the incoming value from the backedge is the second
2413+
/// operand.
2414+
class VPMonotonicPHIRecipe : public VPHeaderPHIRecipe {
2415+
MonotonicDescriptor Desc;
2416+
2417+
public:
2418+
VPMonotonicPHIRecipe(PHINode *Phi, const MonotonicDescriptor &Desc,
2419+
VPValue *Start)
2420+
: VPHeaderPHIRecipe(VPDef::VPMonotonicPHISC, Phi, Start), Desc(Desc) {}
2421+
2422+
~VPMonotonicPHIRecipe() override = default;
2423+
2424+
VPMonotonicPHIRecipe *clone() override {
2425+
auto *R = new VPMonotonicPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
2426+
Desc, getStartValue());
2427+
R->addOperand(getBackedgeValue());
2428+
return R;
2429+
}
2430+
2431+
VP_CLASSOF_IMPL(VPDef::VPMonotonicPHISC)
2432+
2433+
static inline bool classof(const VPHeaderPHIRecipe *R) {
2434+
return R->getVPDefID() == VPDef::VPMonotonicPHISC;
2435+
}
2436+
2437+
void execute(VPTransformState &State) override;
2438+
2439+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2440+
/// Print the recipe.
2441+
void print(raw_ostream &O, const Twine &Indent,
2442+
VPSlotTracker &SlotTracker) const override;
2443+
#endif
2444+
2445+
const MonotonicDescriptor &getDescriptor() const { return Desc; }
2446+
2447+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2448+
bool usesFirstLaneOnly(const VPValue *Op) const override {
2449+
assert(is_contained(operands(), Op) &&
2450+
"Op must be an operand of the recipe");
2451+
return true;
2452+
}
2453+
};
2454+
24092455
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
24102456
/// instructions.
24112457
class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
9797
case VPInstruction::ComputeReductionResult: {
9898
return inferScalarType(R->getOperand(0));
9999
}
100+
case VPInstruction::ComputeMonotonicResult: {
101+
auto *PhiR = cast<VPMonotonicPHIRecipe>(R->getOperand(0));
102+
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
103+
return OrigPhi->getType();
104+
}
100105
case VPInstruction::ExplicitVectorLength:
101106
return Type::getIntNTy(Ctx, 32);
102107
case Instruction::PHI:
@@ -276,14 +281,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
276281
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
277282
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
278283
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
279-
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
280-
[this](const auto *R) {
281-
// Handle header phi recipes, except VPWidenIntOrFpInduction
282-
// which needs special handling due it being possibly truncated.
283-
// TODO: consider inferring/caching type of siblings, e.g.,
284-
// backedge value, here and in cases below.
285-
return inferScalarType(R->getStartValue());
286-
})
284+
VPMonotonicPHIRecipe, VPWidenPointerInductionRecipe,
285+
VPEVLBasedIVPHIRecipe>([this](const auto *R) {
286+
// Handle header phi recipes, except VPWidenIntOrFpInduction
287+
// which needs special handling due it being possibly truncated.
288+
// TODO: consider inferring/caching type of siblings, e.g.,
289+
// backedge value, here and in cases below.
290+
return inferScalarType(R->getStartValue());
291+
})
287292
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
288293
[](const auto *R) { return R->getScalarType(); })
289294
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,

0 commit comments

Comments
 (0)