Skip to content

Commit 888e1ea

Browse files
committed
[LoopVectorize] Support vectorization of compressing patterns in VPlan
1 parent d4ba68f commit 888e1ea

File tree

12 files changed

+591
-74
lines changed

12 files changed

+591
-74
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,6 +1403,7 @@ class TargetTransformInfo {
14031403
Normal, ///< The cast is used with a normal load/store.
14041404
Masked, ///< The cast is used with a masked load/store.
14051405
GatherScatter, ///< The cast is used with a gather/scatter.
1406+
Compressed, ///< The cast is used with an expand load/compress store.
14061407
Interleave, ///< The cast is used with an interleaved load/store.
14071408
Reversed, ///< The cast is used with a reversed load/store.
14081409
};

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,10 @@ class LoopVectorizationLegality {
269269
/// induction descriptor.
270270
using InductionList = MapVector<PHINode *, InductionDescriptor>;
271271

272+
/// MonotonicPHIList saves monotonic phi variables and maps them to the
273+
/// monotonic phi descriptor.
274+
using MonotonicPHIList = MapVector<PHINode *, MonotonicDescriptor>;
275+
272276
/// RecurrenceSet contains the phi nodes that are recurrences other than
273277
/// inductions and reductions.
274278
using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
@@ -304,6 +308,11 @@ class LoopVectorizationLegality {
304308
/// Returns the induction variables found in the loop.
305309
const InductionList &getInductionVars() const { return Inductions; }
306310

311+
/// Returns the monotonic phi variables found in the loop.
312+
const MonotonicPHIList &getMonotonicPHIs() const { return MonotonicPHIs; }
313+
314+
bool hasMonotonicPHIs() const { return !MonotonicPHIs.empty(); }
315+
307316
/// Return the fixed-order recurrences found in the loop.
308317
RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
309318

@@ -361,6 +370,12 @@ class LoopVectorizationLegality {
361370
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
362371
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
363372

373+
/// Returns true if Phi is monotonic variable.
374+
bool isMonotonicPHI(PHINode *Phi) const;
375+
376+
/// Check if memory access is compressed when vectorizing.
377+
bool isCompressedPtr(Type *AccessTy, Value *Ptr, BasicBlock *BB) const;
378+
364379
/// Returns true if \p V is invariant across all loop iterations according to
365380
/// SCEV.
366381
bool isInvariant(Value *V) const;
@@ -597,6 +612,9 @@ class LoopVectorizationLegality {
597612
/// variables can be pointers.
598613
InductionList Inductions;
599614

615+
/// Holds all of the monotonic phi variables that we found in the loop.
616+
MonotonicPHIList MonotonicPHIs;
617+
600618
/// Holds all the casts that participate in the update chain of the induction
601619
/// variables, and that have been proven to be redundant (possibly under a
602620
/// runtime guard). These casts can be ignored when creating the vectorized

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
4343
cl::desc("Enable recognition of non-constant strided "
4444
"pointer induction variables."));
4545

46+
static cl::opt<bool> EnableMonotonicPatterns(
47+
"lv-monotonic-patterns", cl::init(true), cl::Hidden,
48+
cl::desc("Enable recognition of monotonic patterns."));
49+
4650
static cl::opt<bool>
4751
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
4852
cl::desc("Allow enabling loop hints to reorder "
@@ -468,6 +472,30 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
468472
return 0;
469473
}
470474

475+
bool LoopVectorizationLegality::isMonotonicPHI(PHINode *Phi) const {
476+
return MonotonicPHIs.count(Phi);
477+
}
478+
479+
bool LoopVectorizationLegality::isCompressedPtr(Type *AccessTy, Value *Ptr,
480+
BasicBlock *BB) const {
481+
MonotonicDescriptor Desc;
482+
if (!MonotonicDescriptor::isMonotonicVal(Ptr, TheLoop, Desc, *PSE.getSE()))
483+
return false;
484+
485+
// Check if memory operation will use the same mask as monotonic phi.
486+
// TODO: relax restrictions of current implementation.
487+
if (Desc.getPredicateEdge() !=
488+
MonotonicDescriptor::Edge(BB, BB->getUniqueSuccessor()))
489+
return false;
490+
491+
// Check if pointer step equals access size.
492+
auto *Step =
493+
dyn_cast<SCEVConstant>(Desc.getExpr()->getStepRecurrence(*PSE.getSE()));
494+
if (!Step)
495+
return false;
496+
return Step->getAPInt() == BB->getDataLayout().getTypeAllocSize(AccessTy);
497+
}
498+
471499
bool LoopVectorizationLegality::isInvariant(Value *V) const {
472500
return LAI->isInvariant(V);
473501
}
@@ -874,6 +902,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
874902
continue;
875903
}
876904

905+
MonotonicDescriptor MD;
906+
if (EnableMonotonicPatterns && MonotonicDescriptor::isMonotonicPHI(
907+
Phi, TheLoop, MD, *PSE.getSE())) {
908+
MonotonicPHIs[Phi] = MD;
909+
continue;
910+
}
911+
877912
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
878913
AllowedExit.insert(Phi);
879914
FixedOrderRecurrences.insert(Phi);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 117 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,7 @@ class LoopVectorizationCostModel {
10951095
CM_Widen_Reverse, // For consecutive accesses with stride -1.
10961096
CM_Interleave,
10971097
CM_GatherScatter,
1098+
CM_Compressed,
10981099
CM_Scalarize,
10991100
CM_VectorCall,
11001101
CM_IntrinsicCall
@@ -1308,9 +1309,9 @@ class LoopVectorizationCostModel {
13081309
getDivRemSpeculationCost(Instruction *I,
13091310
ElementCount VF) const;
13101311

1311-
/// Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a
1312-
/// memory instruction with consecutive access that can be widened, or
1313-
/// CM_Unknown otherwise.
1312+
/// Returns widening decision (CM_Widen, CM_Widen_Reverse or CM_Compressed) if
1313+
/// \p I is a memory instruction with consecutive access that can be widened,
1314+
/// or CM_Unknown otherwise.
13141315
InstWidening memoryInstructionCanBeWidened(Instruction *I, ElementCount VF);
13151316

13161317
/// Returns true if \p I is a memory instruction in an interleaved-group
@@ -3263,6 +3264,9 @@ LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
32633264
auto *Ptr = getLoadStorePointerOperand(I);
32643265
auto *ScalarTy = getLoadStoreType(I);
32653266

3267+
if (Legal->isCompressedPtr(ScalarTy, Ptr, I->getParent()))
3268+
return CM_Compressed;
3269+
32663270
// In order to be widened, the pointer should be consecutive, first of all.
32673271
auto Stride = Legal->isConsecutivePtr(ScalarTy, Ptr);
32683272
if (!Stride)
@@ -3372,9 +3376,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
33723376
if (IsUniformMemOpUse(I))
33733377
return true;
33743378

3375-
return (WideningDecision == CM_Widen ||
3376-
WideningDecision == CM_Widen_Reverse ||
3377-
WideningDecision == CM_Interleave);
3379+
return (
3380+
WideningDecision == CM_Widen || WideningDecision == CM_Widen_Reverse ||
3381+
WideningDecision == CM_Interleave || WideningDecision == CM_Compressed);
33783382
};
33793383

33803384
// Returns true if Ptr is the pointer operand of a memory access instruction
@@ -3514,6 +3518,39 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
35143518
AddToWorklistIfAllowed(IndUpdate);
35153519
}
35163520

3521+
// Handle monotonic phis (similarly to induction vars).
3522+
for (const auto &MonotonicPHI : Legal->getMonotonicPHIs()) {
3523+
auto *Phi = MonotonicPHI.first;
3524+
auto *PhiUpdate = cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
3525+
const auto &Desc = MonotonicPHI.second;
3526+
3527+
auto UniformPhi = llvm::all_of(Phi->users(), [&](User *U) -> bool {
3528+
auto *I = cast<Instruction>(U);
3529+
if (I == Desc.getStepInst())
3530+
return true;
3531+
if (auto *PN = dyn_cast<PHINode>(I); PN && Desc.getChain().contains(PN))
3532+
return true;
3533+
return !TheLoop->contains(I) || Worklist.count(I) ||
3534+
IsVectorizedMemAccessUse(I, Phi);
3535+
});
3536+
if (!UniformPhi)
3537+
continue;
3538+
3539+
auto UniformPhiUpdate =
3540+
llvm::all_of(PhiUpdate->users(), [&](User *U) -> bool {
3541+
auto *I = cast<Instruction>(U);
3542+
if (I == Phi)
3543+
return true;
3544+
return !TheLoop->contains(I) || Worklist.count(I) ||
3545+
IsVectorizedMemAccessUse(I, Phi);
3546+
});
3547+
if (!UniformPhiUpdate)
3548+
continue;
3549+
3550+
AddToWorklistIfAllowed(Phi);
3551+
AddToWorklistIfAllowed(PhiUpdate);
3552+
}
3553+
35173554
Uniforms[VF].insert_range(Worklist);
35183555
}
35193556

@@ -4272,6 +4309,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42724309
case VPDef::VPEVLBasedIVPHISC:
42734310
case VPDef::VPPredInstPHISC:
42744311
case VPDef::VPBranchOnMaskSC:
4312+
case VPDef::VPMonotonicPHISC:
42754313
continue;
42764314
case VPDef::VPReductionSC:
42774315
case VPDef::VPActiveLaneMaskPHISC:
@@ -4992,6 +5030,10 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
49925030
if (Legal->hasUncountableEarlyExit())
49935031
return 1;
49945032

5033+
// Monotonic vars don't support interleaving.
5034+
if (Legal->hasMonotonicPHIs())
5035+
return 1;
5036+
49955037
const bool HasReductions = !Legal->getReductionVars().empty();
49965038

49975039
// If we did not calculate the cost for VF (because the user selected the VF)
@@ -5577,12 +5619,17 @@ InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost(
55775619
Instruction *I, ElementCount VF, InstWidening Decision) {
55785620
Type *ValTy = getLoadStoreType(I);
55795621
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
5622+
const Align Alignment = getLoadStoreAlignment(I);
55805623
unsigned AS = getLoadStoreAddressSpace(I);
55815624
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
55825625

5626+
if (Decision == CM_Compressed)
5627+
return TTI.getExpandCompressMemoryOpCost(I->getOpcode(), VectorTy,
5628+
/*VariableMask*/ true, Alignment,
5629+
CostKind, I);
5630+
55835631
assert((Decision == CM_Widen || Decision == CM_Widen_Reverse) &&
55845632
"Expected widen decision.");
5585-
const Align Alignment = getLoadStoreAlignment(I);
55865633
InstructionCost Cost = 0;
55875634
if (Legal->isMaskRequired(I)) {
55885635
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
@@ -6292,6 +6339,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
62926339
// the scalar version.
62936340
if (isUniformAfterVectorization(I, VF))
62946341
VF = ElementCount::getFixed(1);
6342+
else if (auto *Phi = dyn_cast<PHINode>(I)) {
6343+
// Prohibit scalarization of monotonic phis.
6344+
if (Legal->isMonotonicPHI(Phi))
6345+
return InstructionCost::getInvalid();
6346+
}
62956347

62966348
if (VF.isVector() && isProfitableToScalarize(I, VF))
62976349
return InstsToScalarize[VF][I];
@@ -6647,6 +6699,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66476699
switch (getWideningDecision(I, VF)) {
66486700
case LoopVectorizationCostModel::CM_GatherScatter:
66496701
return TTI::CastContextHint::GatherScatter;
6702+
case LoopVectorizationCostModel::CM_Compressed:
6703+
return TTI::CastContextHint::Compressed;
66506704
case LoopVectorizationCostModel::CM_Interleave:
66516705
return TTI::CastContextHint::Interleave;
66526706
case LoopVectorizationCostModel::CM_Scalarize:
@@ -7238,6 +7292,16 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
72387292
}
72397293
}
72407294

7295+
for (const auto &[MonotonicPhi, MonotonicDesc] : Legal->getMonotonicPHIs()) {
7296+
// TODO: currently, we restrict vectorization of non-uniform monotonic phis
7297+
// by reporting Invalid cost for it. This can be relaxed in future.
7298+
if (VF.isVector() && !CM.isUniformAfterVectorization(MonotonicPhi, VF))
7299+
Cost = InstructionCost::getInvalid();
7300+
else
7301+
Cost += TTI.getCFInstrCost(Instruction::PHI, CostCtx.CostKind);
7302+
CostCtx.SkipCostComputation.insert(MonotonicPhi);
7303+
}
7304+
72417305
// Pre-compute the costs for branches except for the backedge, as the number
72427306
// of replicate regions in a VPlan may not directly match the number of
72437307
// branches, which would lead to different decisions.
@@ -8229,8 +8293,9 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
82298293
LoopVectorizationCostModel::InstWidening Decision =
82308294
CM.getWideningDecision(I, Range.Start);
82318295
bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
8296+
bool Compressed = Decision == LoopVectorizationCostModel::CM_Compressed;
82328297
bool Consecutive =
8233-
Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
8298+
Reverse || Compressed || Decision == LoopVectorizationCostModel::CM_Widen;
82348299

82358300
VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
82368301
if (Consecutive) {
@@ -8258,11 +8323,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
82588323
}
82598324
if (LoadInst *Load = dyn_cast<LoadInst>(I))
82608325
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8261-
VPIRMetadata(*Load, LVer), I->getDebugLoc());
8326+
Compressed, VPIRMetadata(*Load, LVer),
8327+
I->getDebugLoc());
82628328

82638329
StoreInst *Store = cast<StoreInst>(I);
82648330
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8265-
Reverse, VPIRMetadata(*Store, LVer),
8331+
Reverse, Compressed, VPIRMetadata(*Store, LVer),
82668332
I->getDebugLoc());
82678333
}
82688334

@@ -8771,11 +8837,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
87718837
return Recipe;
87728838

87738839
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8774-
assert((Legal->isReductionVariable(Phi) ||
8840+
assert((Legal->isMonotonicPHI(Phi) || Legal->isReductionVariable(Phi) ||
87758841
Legal->isFixedOrderRecurrence(Phi)) &&
8776-
"can only widen reductions and fixed-order recurrences here");
8842+
"can only widen monotonic phis, reductions and fixed-order "
8843+
"recurrences here");
87778844
VPValue *StartV = Operands[0];
8778-
if (Legal->isReductionVariable(Phi)) {
8845+
Value *IncomingVal =
8846+
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader());
8847+
if (Legal->isMonotonicPHI(Phi)) {
8848+
const MonotonicDescriptor &Desc =
8849+
Legal->getMonotonicPHIs().find(Phi)->second;
8850+
assert(Desc.getExpr()->getStart() == PSE.getSCEV(IncomingVal));
8851+
PhiRecipe = new VPMonotonicPHIRecipe(Phi, Desc, StartV);
8852+
} else if (Legal->isReductionVariable(Phi)) {
87798853
const RecurrenceDescriptor &RdxDesc =
87808854
Legal->getReductionVars().find(Phi)->second;
87818855
assert(RdxDesc.getRecurrenceStartValue() ==
@@ -9397,6 +9471,27 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
93979471
// bring the VPlan to its final state.
93989472
// ---------------------------------------------------------------------------
93999473

9474+
// Adjust the recipes for any monotonic phis.
9475+
for (VPRecipeBase &R : HeaderVPBB->phis()) {
9476+
auto *MonotonicPhi = dyn_cast<VPMonotonicPHIRecipe>(&R);
9477+
if (!MonotonicPhi)
9478+
continue;
9479+
9480+
auto &Desc = MonotonicPhi->getDescriptor();
9481+
auto [EdgeSrc, EdgeDst] = Desc.getPredicateEdge();
9482+
auto &SE = *PSE.getSE();
9483+
auto *Step = vputils::getOrCreateVPValueForSCEVExpr(
9484+
*Plan, Desc.getExpr()->getStepRecurrence(SE), SE);
9485+
9486+
auto *MonotonicI = new VPInstruction(
9487+
VPInstruction::ComputeMonotonicResult,
9488+
{MonotonicPhi, RecipeBuilder.getEdgeMask(EdgeSrc, EdgeDst), Step},
9489+
*Desc.getStepInst());
9490+
auto *InsertBlock = MonotonicPhi->getBackedgeRecipe().getParent();
9491+
InsertBlock->insert(MonotonicI, InsertBlock->getFirstNonPhi());
9492+
MonotonicPhi->getBackedgeValue()->replaceAllUsesWith(MonotonicI);
9493+
}
9494+
94009495
// Adjust the recipes for any inloop reductions.
94019496
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
94029497

@@ -10587,6 +10682,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1058710682
IC = CM.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
1058810683

1058910684
unsigned SelectedIC = std::max(IC, UserIC);
10685+
10686+
if (LVL.hasMonotonicPHIs() && SelectedIC > 1) {
10687+
reportVectorizationFailure(
10688+
"Interleaving of loop with monotonic vars",
10689+
"Interleaving of loops with monotonic vars is not supported",
10690+
"CantInterleaveWithMonotonicVars", ORE, L);
10691+
return false;
10692+
}
10693+
1059010694
// Optimistically generate runtime checks if they are needed. Drop them if
1059110695
// they turn out to not be profitable.
1059210696
if (VF.Width.isVector() || SelectedIC > 1)

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,11 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
308308
VPLane LastLane(IsSingleScalar ? 0 : VF.getKnownMinValue() - 1);
309309
// Check if there is a scalar value for the selected lane.
310310
if (!hasScalarValue(Def, LastLane)) {
311-
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
312-
// VPExpandSCEVRecipes can also be a single scalar.
311+
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes,
312+
// VPMonotonicPHIRecipe and VPExpandSCEVRecipes can also be a single scalar.
313313
assert((isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe,
314-
VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
314+
VPMonotonicPHIRecipe, VPExpandSCEVRecipe>(
315+
Def->getDefiningRecipe())) &&
315316
"unexpected recipe found to be invariant");
316317
IsSingleScalar = true;
317318
LastLane = 0;
@@ -1005,6 +1006,7 @@ void VPlan::execute(VPTransformState *State) {
10051006
auto *PhiR = cast<VPSingleDefRecipe>(&R);
10061007
// VPInstructions currently model scalar Phis only.
10071008
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1009+
isa<VPMonotonicPHIRecipe>(PhiR) ||
10081010
(isa<VPReductionPHIRecipe>(PhiR) &&
10091011
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
10101012
Value *Phi = State->get(PhiR, NeedsScalar);

0 commit comments

Comments
 (0)