Skip to content

Commit e38379d

Browse files
committed
[LoopVectorize] Support vectorization of compressing patterns in VPlan
1 parent 2f9baaf commit e38379d

File tree

12 files changed

+579
-73
lines changed

12 files changed

+579
-73
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,7 @@ class TargetTransformInfo {
14421442
Normal, ///< The cast is used with a normal load/store.
14431443
Masked, ///< The cast is used with a masked load/store.
14441444
GatherScatter, ///< The cast is used with a gather/scatter.
1445+
Compressed, ///< The cast is used with an expand load/compress store.
14451446
Interleave, ///< The cast is used with an interleaved load/store.
14461447
Reversed, ///< The cast is used with a reversed load/store.
14471448
};

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,10 @@ class LoopVectorizationLegality {
272272
/// induction descriptor.
273273
using InductionList = MapVector<PHINode *, InductionDescriptor>;
274274

275+
/// MonotonicPHIList saves monotonic phi variables and maps them to the
276+
/// monotonic phi descriptor.
277+
using MonotonicPHIList = MapVector<PHINode *, MonotonicDescriptor>;
278+
275279
/// RecurrenceSet contains the phi nodes that are recurrences other than
276280
/// inductions and reductions.
277281
using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
@@ -315,6 +319,11 @@ class LoopVectorizationLegality {
315319
/// Returns the induction variables found in the loop.
316320
const InductionList &getInductionVars() const { return Inductions; }
317321

322+
/// Returns the monotonic phi variables found in the loop.
323+
const MonotonicPHIList &getMonotonicPHIs() const { return MonotonicPHIs; }
324+
325+
bool hasMonotonicPHIs() const { return !MonotonicPHIs.empty(); }
326+
318327
/// Return the fixed-order recurrences found in the loop.
319328
RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
320329

@@ -372,6 +381,12 @@ class LoopVectorizationLegality {
372381
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
373382
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
374383

384+
/// Returns true if Phi is monotonic variable.
385+
bool isMonotonicPHI(PHINode *Phi) const;
386+
387+
/// Check if memory access is compressed when vectorizing.
388+
bool isCompressedPtr(Type *AccessTy, Value *Ptr, BasicBlock *BB) const;
389+
375390
/// Returns true if \p V is invariant across all loop iterations according to
376391
/// SCEV.
377392
bool isInvariant(Value *V) const;
@@ -677,6 +692,9 @@ class LoopVectorizationLegality {
677692
/// variables can be pointers.
678693
InductionList Inductions;
679694

695+
/// Holds all of the monotonic phi variables that we found in the loop.
696+
MonotonicPHIList MonotonicPHIs;
697+
680698
/// Holds all the casts that participate in the update chain of the induction
681699
/// variables, and that have been proven to be redundant (possibly under a
682700
/// runtime guard). These casts can be ignored when creating the vectorized

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
4545
cl::desc("Enable recognition of non-constant strided "
4646
"pointer induction variables."));
4747

48+
static cl::opt<bool> EnableMonotonicPatterns(
49+
"lv-monotonic-patterns", cl::init(true), cl::Hidden,
50+
cl::desc("Enable recognition of monotonic patterns."));
51+
4852
static cl::opt<bool>
4953
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
5054
cl::desc("Allow enabling loop hints to reorder "
@@ -470,6 +474,30 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
470474
return 0;
471475
}
472476

477+
bool LoopVectorizationLegality::isMonotonicPHI(PHINode *Phi) const {
478+
return MonotonicPHIs.count(Phi);
479+
}
480+
481+
bool LoopVectorizationLegality::isCompressedPtr(Type *AccessTy, Value *Ptr,
482+
BasicBlock *BB) const {
483+
MonotonicDescriptor Desc;
484+
if (!MonotonicDescriptor::isMonotonicVal(Ptr, TheLoop, Desc, *PSE.getSE()))
485+
return false;
486+
487+
// Check if memory operation will use the same mask as monotonic phi.
488+
// TODO: relax restrictions of current implementation.
489+
if (Desc.getPredicateEdge() !=
490+
MonotonicDescriptor::Edge(BB, BB->getUniqueSuccessor()))
491+
return false;
492+
493+
// Check if pointer step equals access size.
494+
auto *Step =
495+
dyn_cast<SCEVConstant>(Desc.getExpr()->getStepRecurrence(*PSE.getSE()));
496+
if (!Step)
497+
return false;
498+
return Step->getAPInt() == BB->getDataLayout().getTypeAllocSize(AccessTy);
499+
}
500+
473501
bool LoopVectorizationLegality::isInvariant(Value *V) const {
474502
return LAI->isInvariant(V);
475503
}
@@ -916,6 +944,13 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
916944
return true;
917945
}
918946

947+
MonotonicDescriptor MD;
948+
if (EnableMonotonicPatterns &&
949+
MonotonicDescriptor::isMonotonicPHI(Phi, TheLoop, MD, *PSE.getSE())) {
950+
MonotonicPHIs[Phi] = MD;
951+
return true;
952+
}
953+
919954
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
920955
AllowedExit.insert(Phi);
921956
FixedOrderRecurrences.insert(Phi);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 121 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,7 @@ class LoopVectorizationCostModel {
10271027
CM_Widen_Reverse, // For consecutive accesses with stride -1.
10281028
CM_Interleave,
10291029
CM_GatherScatter,
1030+
CM_Compressed,
10301031
CM_Scalarize,
10311032
CM_VectorCall,
10321033
CM_IntrinsicCall
@@ -1240,9 +1241,9 @@ class LoopVectorizationCostModel {
12401241
getDivRemSpeculationCost(Instruction *I,
12411242
ElementCount VF) const;
12421243

1243-
/// Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a
1244-
/// memory instruction with consecutive access that can be widened, or
1245-
/// CM_Unknown otherwise.
1244+
/// Returns widening decision (CM_Widen, CM_Widen_Reverse or CM_Compressed) if
1245+
/// \p I is a memory instruction with consecutive access that can be widened,
1246+
/// or CM_Unknown otherwise.
12461247
InstWidening memoryInstructionCanBeWidened(Instruction *I, ElementCount VF);
12471248

12481249
/// Returns true if \p I is a memory instruction in an interleaved-group
@@ -2999,6 +3000,9 @@ LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
29993000
auto *Ptr = getLoadStorePointerOperand(I);
30003001
auto *ScalarTy = getLoadStoreType(I);
30013002

3003+
if (Legal->isCompressedPtr(ScalarTy, Ptr, I->getParent()))
3004+
return CM_Compressed;
3005+
30023006
// In order to be widened, the pointer should be consecutive, first of all.
30033007
auto Stride = Legal->isConsecutivePtr(ScalarTy, Ptr);
30043008
if (!Stride)
@@ -3108,9 +3112,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
31083112
if (IsUniformMemOpUse(I))
31093113
return true;
31103114

3111-
return (WideningDecision == CM_Widen ||
3112-
WideningDecision == CM_Widen_Reverse ||
3113-
WideningDecision == CM_Interleave);
3115+
return (
3116+
WideningDecision == CM_Widen || WideningDecision == CM_Widen_Reverse ||
3117+
WideningDecision == CM_Interleave || WideningDecision == CM_Compressed);
31143118
};
31153119

31163120
// Returns true if Ptr is the pointer operand of a memory access instruction
@@ -3255,6 +3259,39 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
32553259
AddToWorklistIfAllowed(IndUpdate);
32563260
}
32573261

3262+
// Handle monotonic phis (similarly to induction vars).
3263+
for (const auto &MonotonicPHI : Legal->getMonotonicPHIs()) {
3264+
auto *Phi = MonotonicPHI.first;
3265+
auto *PhiUpdate = cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
3266+
const auto &Desc = MonotonicPHI.second;
3267+
3268+
auto UniformPhi = llvm::all_of(Phi->users(), [&](User *U) -> bool {
3269+
auto *I = cast<Instruction>(U);
3270+
if (I == Desc.getStepInst())
3271+
return true;
3272+
if (auto *PN = dyn_cast<PHINode>(I); PN && Desc.getChain().contains(PN))
3273+
return true;
3274+
return !TheLoop->contains(I) || Worklist.count(I) ||
3275+
IsVectorizedMemAccessUse(I, Phi);
3276+
});
3277+
if (!UniformPhi)
3278+
continue;
3279+
3280+
auto UniformPhiUpdate =
3281+
llvm::all_of(PhiUpdate->users(), [&](User *U) -> bool {
3282+
auto *I = cast<Instruction>(U);
3283+
if (I == Phi)
3284+
return true;
3285+
return !TheLoop->contains(I) || Worklist.count(I) ||
3286+
IsVectorizedMemAccessUse(I, Phi);
3287+
});
3288+
if (!UniformPhiUpdate)
3289+
continue;
3290+
3291+
AddToWorklistIfAllowed(Phi);
3292+
AddToWorklistIfAllowed(PhiUpdate);
3293+
}
3294+
32583295
Uniforms[VF].insert_range(Worklist);
32593296
}
32603297

@@ -4046,6 +4083,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40464083
case VPDef::VPEVLBasedIVPHISC:
40474084
case VPDef::VPPredInstPHISC:
40484085
case VPDef::VPBranchOnMaskSC:
4086+
case VPDef::VPMonotonicPHISC:
40494087
continue;
40504088
case VPDef::VPReductionSC:
40514089
case VPDef::VPActiveLaneMaskPHISC:
@@ -4559,6 +4597,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
45594597
if (Plan.hasEarlyExit())
45604598
return 1;
45614599

4600+
// Monotonic vars don't support interleaving.
4601+
if (Legal->hasMonotonicPHIs())
4602+
return 1;
4603+
45624604
const bool HasReductions =
45634605
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
45644606
IsaPred<VPReductionPHIRecipe>);
@@ -5191,12 +5233,17 @@ InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost(
51915233
Instruction *I, ElementCount VF, InstWidening Decision) {
51925234
Type *ValTy = getLoadStoreType(I);
51935235
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
5236+
const Align Alignment = getLoadStoreAlignment(I);
51945237
unsigned AS = getLoadStoreAddressSpace(I);
51955238
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
51965239

5240+
if (Decision == CM_Compressed)
5241+
return TTI.getExpandCompressMemoryOpCost(I->getOpcode(), VectorTy,
5242+
/*VariableMask*/ true, Alignment,
5243+
CostKind, I);
5244+
51975245
assert((Decision == CM_Widen || Decision == CM_Widen_Reverse) &&
51985246
"Expected widen decision.");
5199-
const Align Alignment = getLoadStoreAlignment(I);
52005247
InstructionCost Cost = 0;
52015248
if (Legal->isMaskRequired(I)) {
52025249
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
@@ -6299,6 +6346,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
62996346
switch (getWideningDecision(I, VF)) {
63006347
case LoopVectorizationCostModel::CM_GatherScatter:
63016348
return TTI::CastContextHint::GatherScatter;
6349+
case LoopVectorizationCostModel::CM_Compressed:
6350+
return TTI::CastContextHint::Compressed;
63026351
case LoopVectorizationCostModel::CM_Interleave:
63036352
return TTI::CastContextHint::Interleave;
63046353
case LoopVectorizationCostModel::CM_Scalarize:
@@ -7514,8 +7563,9 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75147563
LoopVectorizationCostModel::InstWidening Decision =
75157564
CM.getWideningDecision(I, Range.Start);
75167565
bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
7566+
bool Compressed = Decision == LoopVectorizationCostModel::CM_Compressed;
75177567
bool Consecutive =
7518-
Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
7568+
Reverse || Compressed || Decision == LoopVectorizationCostModel::CM_Widen;
75197569

75207570
VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
75217571
if (Consecutive) {
@@ -7545,11 +7595,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75457595
}
75467596
if (LoadInst *Load = dyn_cast<LoadInst>(I))
75477597
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
7548-
VPIRMetadata(*Load, LVer), I->getDebugLoc());
7598+
Compressed, VPIRMetadata(*Load, LVer),
7599+
I->getDebugLoc());
75497600

75507601
StoreInst *Store = cast<StoreInst>(I);
75517602
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
7552-
Reverse, VPIRMetadata(*Store, LVer),
7603+
Reverse, Compressed, VPIRMetadata(*Store, LVer),
75537604
I->getDebugLoc());
75547605
}
75557606

@@ -8064,11 +8115,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
80648115
return Recipe;
80658116

80668117
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8067-
assert((Legal->isReductionVariable(Phi) ||
8118+
assert((Legal->isMonotonicPHI(Phi) || Legal->isReductionVariable(Phi) ||
80688119
Legal->isFixedOrderRecurrence(Phi)) &&
8069-
"can only widen reductions and fixed-order recurrences here");
8120+
"can only widen monotonic phis, reductions and fixed-order "
8121+
"recurrences here");
80708122
VPValue *StartV = Operands[0];
8071-
if (Legal->isReductionVariable(Phi)) {
8123+
Value *IncomingVal =
8124+
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader());
8125+
if (Legal->isMonotonicPHI(Phi)) {
8126+
const MonotonicDescriptor &Desc =
8127+
Legal->getMonotonicPHIs().find(Phi)->second;
8128+
assert(Desc.getExpr()->getStart() == PSE.getSCEV(IncomingVal));
8129+
PhiRecipe = new VPMonotonicPHIRecipe(Phi, Desc, StartV);
8130+
} else if (Legal->isReductionVariable(Phi)) {
80728131
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
80738132
assert(RdxDesc.getRecurrenceStartValue() ==
80748133
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
@@ -8419,6 +8478,46 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84198478
// bring the VPlan to its final state.
84208479
// ---------------------------------------------------------------------------
84218480

8481+
// Adjust the recipes for any monotonic phis.
8482+
for (VPRecipeBase &R : HeaderVPBB->phis()) {
8483+
auto *MonotonicPhi = dyn_cast<VPMonotonicPHIRecipe>(&R);
8484+
if (!MonotonicPhi)
8485+
continue;
8486+
8487+
// Prohibit scalarization of monotonic phis.
8488+
if (!all_of(Range, [&](ElementCount VF) {
8489+
return CM.isUniformAfterVectorization(
8490+
MonotonicPhi->getUnderlyingInstr(), VF);
8491+
}))
8492+
return nullptr;
8493+
8494+
// Obtain mask value for the predicate edge from the last VPBlendRecipe in
8495+
// chain.
8496+
VPValue *Chain = MonotonicPhi->getBackedgeValue();
8497+
VPValue *Mask = nullptr;
8498+
while (auto *BlendR = dyn_cast<VPBlendRecipe>(Chain))
8499+
for (unsigned I = 0, E = BlendR->getNumIncomingValues(); I != E; ++I)
8500+
if (auto *IncomingVal = BlendR->getIncomingValue(I);
8501+
IncomingVal != MonotonicPhi) {
8502+
Chain = IncomingVal;
8503+
Mask = BlendR->getMask(I);
8504+
break;
8505+
}
8506+
assert(Mask);
8507+
8508+
auto &Desc = MonotonicPhi->getDescriptor();
8509+
auto &SE = *PSE.getSE();
8510+
auto *Step = vputils::getOrCreateVPValueForSCEVExpr(
8511+
*Plan, Desc.getExpr()->getStepRecurrence(SE));
8512+
8513+
auto *MonotonicI =
8514+
new VPInstruction(VPInstruction::ComputeMonotonicResult,
8515+
{MonotonicPhi, Mask, Step}, *Desc.getStepInst());
8516+
auto *InsertBlock = MonotonicPhi->getBackedgeRecipe().getParent();
8517+
InsertBlock->insert(MonotonicI, InsertBlock->getFirstNonPhi());
8518+
MonotonicPhi->getBackedgeValue()->replaceAllUsesWith(MonotonicI);
8519+
}
8520+
84228521
// Adjust the recipes for any inloop reductions.
84238522
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
84248523

@@ -9881,6 +9980,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98819980
IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
98829981

98839982
unsigned SelectedIC = std::max(IC, UserIC);
9983+
9984+
if (LVL.hasMonotonicPHIs() && SelectedIC > 1) {
9985+
reportVectorizationFailure(
9986+
"Interleaving of loop with monotonic vars",
9987+
"Interleaving of loops with monotonic vars is not supported",
9988+
"CantInterleaveWithMonotonicVars", ORE, L);
9989+
return false;
9990+
}
9991+
98849992
// Optimistically generate runtime checks if they are needed. Drop them if
98859993
// they turn out to not be profitable.
98869994
if (VF.Width.isVector() || SelectedIC > 1) {

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,7 @@ void VPlan::execute(VPTransformState *State) {
984984
auto *PhiR = cast<VPSingleDefRecipe>(&R);
985985
// VPInstructions currently model scalar Phis only.
986986
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
987+
isa<VPMonotonicPHIRecipe>(PhiR) ||
987988
(isa<VPReductionPHIRecipe>(PhiR) &&
988989
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
989990

0 commit comments

Comments
 (0)