Skip to content

Commit a3f75cf

Browse files
committed
[LV] Fold isPredicatedInst into isMaskRequired
Fold LoopVectorizationCostModel::isPredicatedInst into LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item. Note that we still need to pass whether or not we're tail-folding by masking from the cost-model into isMaskRequired.
1 parent c6f7fa7 commit a3f75cf

File tree

2 files changed

+37
-42
lines changed

2 files changed

+37
-42
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -426,11 +426,13 @@ class LoopVectorizationLegality {
426426
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
427427
}
428428

429-
/// Returns true if vector representation of the instruction \p I
430-
/// requires mask.
431-
bool isMaskRequired(const Instruction *I) const {
432-
return MaskedOp.contains(I);
433-
}
429+
/// Returns true if MaskedOp contains \p I.
430+
bool isMasked(Instruction *I) const { return MaskedOp.contains(I); }
431+
432+
/// Returns true if \p I is an instruction that needs to be predicated
433+
/// at runtime. The result is independent of the predication mechanism.
434+
/// Superset of instructions that return true for isScalarWithPredication.
435+
bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;
434436

435437
/// Returns true if there is at least one function call in the loop which
436438
/// has a vectorized variant available.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,11 +1283,6 @@ class LoopVectorizationCostModel {
12831283
/// \p VF is the vectorization factor that will be used to vectorize \p I.
12841284
bool isScalarWithPredication(Instruction *I, ElementCount VF) const;
12851285

1286-
/// Returns true if \p I is an instruction that needs to be predicated
1287-
/// at runtime. The result is independent of the predication mechanism.
1288-
/// Superset of instructions that return true for isScalarWithPredication.
1289-
bool isPredicatedInst(Instruction *I) const;
1290-
12911286
/// Return the costs for our two available strategies for lowering a
12921287
/// div/rem operation which requires speculating at least one lane.
12931288
/// First result is for scalarization (will be invalid for scalable
@@ -2941,7 +2936,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
29412936

29422937
bool LoopVectorizationCostModel::isScalarWithPredication(
29432938
Instruction *I, ElementCount VF) const {
2944-
if (!isPredicatedInst(I))
2939+
if (!Legal->isMaskRequired(I, foldTailByMasking()))
29452940
return false;
29462941

29472942
// Do we have a non-scalar lowering for this predicated
@@ -2980,56 +2975,56 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
29802975
}
29812976
}
29822977

2983-
// TODO: Fold into LoopVectorizationLegality::isMaskRequired.
2984-
bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
2978+
bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
2979+
bool FoldTailByMasking) const {
29852980
// TODO: We can use the loop-preheader as context point here and get
29862981
// context sensitive reasoning for isSafeToSpeculativelyExecute.
29872982
if (isSafeToSpeculativelyExecute(I) ||
2988-
(isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
2983+
(isa<LoadInst, StoreInst, CallInst>(I) && !isMasked(I)) ||
29892984
isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
29902985
return false;
29912986

29922987
// If the instruction was executed conditionally in the original scalar loop,
29932988
// predication is needed with a mask whose lanes are all possibly inactive.
2994-
if (Legal->blockNeedsPredication(I->getParent()))
2989+
if (blockNeedsPredication(I->getParent()))
29952990
return true;
29962991

29972992
// If we're not folding the tail by masking, predication is unnecessary.
2998-
if (!foldTailByMasking())
2993+
if (!FoldTailByMasking)
29992994
return false;
30002995

30012996
// All that remain are instructions with side-effects originally executed in
30022997
// the loop unconditionally, but now execute under a tail-fold mask (only)
30032998
// having at least one active lane (the first). If the side-effects of the
30042999
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
30053000
// - it will cause the same side-effects as when masked.
3006-
switch(I->getOpcode()) {
3001+
switch (I->getOpcode()) {
30073002
default:
30083003
llvm_unreachable(
30093004
"instruction should have been considered by earlier checks");
30103005
case Instruction::Call:
30113006
// Side-effects of a Call are assumed to be non-invariant, needing a
30123007
// (fold-tail) mask.
3013-
assert(Legal->isMaskRequired(I) &&
3008+
assert(isMasked(I) &&
30143009
"should have returned earlier for calls not needing a mask");
30153010
return true;
30163011
case Instruction::Load:
30173012
// If the address is loop invariant no predication is needed.
3018-
return !Legal->isInvariant(getLoadStorePointerOperand(I));
3013+
return !isInvariant(getLoadStorePointerOperand(I));
30193014
case Instruction::Store: {
30203015
// For stores, we need to prove both speculation safety (which follows from
30213016
// the same argument as loads), but also must prove the value being stored
30223017
// is correct. The easiest form of the later is to require that all values
30233018
// stored are the same.
3024-
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3019+
return !(isInvariant(getLoadStorePointerOperand(I)) &&
30253020
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
30263021
}
30273022
case Instruction::UDiv:
30283023
case Instruction::SDiv:
30293024
case Instruction::SRem:
30303025
case Instruction::URem:
30313026
// If the divisor is loop-invariant no predication is needed.
3032-
return !Legal->isInvariant(I->getOperand(1));
3027+
return !isInvariant(I->getOperand(1));
30333028
}
30343029
}
30353030

@@ -3144,8 +3139,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
31443139
// (either a gap at the end of a load-access that may result in a speculative
31453140
// load, or any gaps in a store-access).
31463141
bool PredicatedAccessRequiresMasking =
3147-
blockNeedsPredicationForAnyReason(I->getParent()) &&
3148-
Legal->isMaskRequired(I);
3142+
blockNeedsPredicationForAnyReason(I->getParent()) && Legal->isMasked(I);
31493143
bool LoadAccessWithGapsRequiresEpilogMasking =
31503144
isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
31513145
!isScalarEpilogueAllowed();
@@ -3235,7 +3229,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
32353229
<< *I << "\n");
32363230
return;
32373231
}
3238-
if (isPredicatedInst(I)) {
3232+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
32393233
LLVM_DEBUG(
32403234
dbgs() << "LV: Found not uniform due to requiring predication: " << *I
32413235
<< "\n");
@@ -4952,7 +4946,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
49524946
// from moving "masked load/store" check from legality to cost model.
49534947
// Masked Load/Gather emulation was previously never allowed.
49544948
// Limited number of Masked Store/Scatter emulation was allowed.
4955-
assert((isPredicatedInst(I)) &&
4949+
assert((Legal->isMaskRequired(I, foldTailByMasking())) &&
49564950
"Expecting a scalar emulated instruction");
49574951
return isa<LoadInst>(I) ||
49584952
(isa<StoreInst>(I) &&
@@ -5254,7 +5248,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
52545248
// If we have a predicated load/store, it will need extra i1 extracts and
52555249
// conditional branches, but may not be executed for each vector lane. Scale
52565250
// the cost by the probability of executing the predicated block.
5257-
if (isPredicatedInst(I)) {
5251+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
52585252
Cost /= getPredBlockCostDivisor(CostKind);
52595253

52605254
// Add the cost of an i1 extract and a branch
@@ -5287,7 +5281,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
52875281
"Stride should be 1 or -1 for consecutive memory access");
52885282
const Align Alignment = getLoadStoreAlignment(I);
52895283
InstructionCost Cost = 0;
5290-
if (Legal->isMaskRequired(I)) {
5284+
if (Legal->isMasked(I)) {
52915285
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
52925286
CostKind);
52935287
} else {
@@ -5345,8 +5339,7 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
53455339

53465340
return TTI.getAddressComputationCost(VectorTy) +
53475341
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
5348-
Legal->isMaskRequired(I), Alignment,
5349-
CostKind, I);
5342+
Legal->isMasked(I), Alignment, CostKind, I);
53505343
}
53515344

53525345
InstructionCost
@@ -5375,12 +5368,11 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
53755368
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
53765369
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
53775370
InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
5378-
Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
5379-
UseMaskForGaps);
5371+
Group->getAlign(), AS, CostKind, Legal->isMasked(I), UseMaskForGaps);
53805372

53815373
if (Group->isReverse()) {
53825374
// TODO: Add support for reversed masked interleaved access.
5383-
assert(!Legal->isMaskRequired(I) &&
5375+
assert(!Legal->isMasked(I) &&
53845376
"Reverse masked interleaved access not supported.");
53855377
Cost += Group->getNumMembers() *
53865378
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,
@@ -5873,7 +5865,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
58735865
continue;
58745866
}
58755867

5876-
bool MaskRequired = Legal->isMaskRequired(CI);
5868+
bool MaskRequired = Legal->isMasked(CI);
58775869
// Compute corresponding vector type for return value and arguments.
58785870
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
58795871
for (Type *ScalarTy : ScalarTys)
@@ -5993,7 +5985,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
59935985
// instruction in the loop. In that case, it is not trivially hoistable.
59945986
auto *OpI = dyn_cast<Instruction>(Op);
59955987
return !OpI || !TheLoop->contains(OpI) ||
5996-
(!isPredicatedInst(OpI) &&
5988+
(!Legal->isMaskRequired(OpI, foldTailByMasking()) &&
59975989
(!isa<PHINode>(OpI) || OpI->getParent() != TheLoop->getHeader()) &&
59985990
all_of(OpI->operands(),
59995991
[this](Value *Op) { return shouldConsiderInvariant(Op); }));
@@ -6176,7 +6168,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
61766168
case Instruction::SDiv:
61776169
case Instruction::URem:
61786170
case Instruction::SRem:
6179-
if (VF.isVector() && isPredicatedInst(I)) {
6171+
if (VF.isVector() && Legal->isMaskRequired(I, foldTailByMasking())) {
61806172
const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost(I, VF);
61816173
return isDivRemScalarWithPredication(ScalarCost, SafeDivisorCost) ?
61826174
ScalarCost : SafeDivisorCost;
@@ -6360,8 +6352,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
63606352
return TTI::CastContextHint::Interleave;
63616353
case LoopVectorizationCostModel::CM_Scalarize:
63626354
case LoopVectorizationCostModel::CM_Widen:
6363-
return isPredicatedInst(I) ? TTI::CastContextHint::Masked
6364-
: TTI::CastContextHint::Normal;
6355+
return Legal->isMaskRequired(I, foldTailByMasking())
6356+
? TTI::CastContextHint::Masked
6357+
: TTI::CastContextHint::Normal;
63656358
case LoopVectorizationCostModel::CM_Widen_Reverse:
63666359
return TTI::CastContextHint::Reversed;
63676360
case LoopVectorizationCostModel::CM_Unknown:
@@ -7720,7 +7713,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
77207713
return nullptr;
77217714

77227715
VPValue *Mask = nullptr;
7723-
if (Legal->isMaskRequired(I))
7716+
if (Legal->isMasked(I))
77247717
Mask = getBlockInMask(Builder.getInsertBlock());
77257718

77267719
// Determine if the pointer operand of the access is either consecutive or
@@ -7916,7 +7909,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
79167909
// vector variant at this VF requires a mask, so we synthesize an
79177910
// all-true mask.
79187911
VPValue *Mask = nullptr;
7919-
if (Legal->isMaskRequired(CI))
7912+
if (Legal->isMasked(CI))
79207913
Mask = getBlockInMask(Builder.getInsertBlock());
79217914
else
79227915
Mask = Plan.getOrAddLiveIn(
@@ -7957,7 +7950,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
79577950
case Instruction::URem: {
79587951
// If not provably safe, use a select to form a safe divisor before widening the
79597952
// div/rem operation itself. Otherwise fall through to general handling below.
7960-
if (CM.isPredicatedInst(I)) {
7953+
if (Legal->isMaskRequired(I, CM.foldTailByMasking())) {
79617954
SmallVector<VPValue *> Ops(Operands);
79627955
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
79637956
VPValue *One =
@@ -8040,7 +8033,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
80408033

80418034
// In case of predicated execution (due to tail-folding, or conditional
80428035
// execution, or both), pass the relevant mask.
8043-
if (Legal->isMaskRequired(HI->Store))
8036+
if (Legal->isMasked(HI->Store))
80448037
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
80458038

80468039
return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
@@ -8053,7 +8046,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
80538046
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
80548047
Range);
80558048

8056-
bool IsPredicated = CM.isPredicatedInst(I);
8049+
bool IsPredicated = Legal->isMaskRequired(I, CM.foldTailByMasking());
80578050

80588051
// Even if the instruction is not marked as uniform, there are certain
80598052
// intrinsic calls that can be effectively treated as such, so we check for

0 commit comments

Comments
 (0)