Skip to content

Commit d8bc3a7

Browse files
committed
[LV] Fold isPredicatedInst into isMaskRequired
Fold LoopVectorizationCostModel::isPredicatedInst into LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item. Note that we still need to pass whether or not we're tail-folding by masking from the cost-model into isMaskRequired.
1 parent 47ce75e commit d8bc3a7

File tree

3 files changed

+107
-111
lines changed

3 files changed

+107
-111
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -418,11 +418,12 @@ class LoopVectorizationLegality {
418418
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
419419
}
420420

421+
/// Returns true if MaskedOp contains \p I.
422+
bool isMasked(Instruction *I) const { return MaskedOp.contains(I); }
423+
421424
/// Returns true if vector representation of the instruction \p I
422425
/// requires mask.
423-
bool isMaskRequired(const Instruction *I) const {
424-
return MaskedOp.contains(I);
425-
}
426+
bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;
426427

427428
/// Returns true if there is at least one function call in the loop which
428429
/// has a vectorized variant available.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,11 +1292,6 @@ class LoopVectorizationCostModel {
12921292
/// \p VF is the vectorization factor that will be used to vectorize \p I.
12931293
bool isScalarWithPredication(Instruction *I, ElementCount VF) const;
12941294

1295-
/// Returns true if \p I is an instruction that needs to be predicated
1296-
/// at runtime. The result is independent of the predication mechanism.
1297-
/// Superset of instructions that return true for isScalarWithPredication.
1298-
bool isPredicatedInst(Instruction *I) const;
1299-
13001295
/// Return the costs for our two available strategies for lowering a
13011296
/// div/rem operation which requires speculating at least one lane.
13021297
/// First result is for scalarization (will be invalid for scalable
@@ -3018,7 +3013,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
30183013

30193014
bool LoopVectorizationCostModel::isScalarWithPredication(
30203015
Instruction *I, ElementCount VF) const {
3021-
if (!isPredicatedInst(I))
3016+
if (!Legal->isMaskRequired(I, foldTailByMasking()))
30223017
return false;
30233018

30243019
// Do we have a non-scalar lowering for this predicated
@@ -3057,48 +3052,46 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
30573052
}
30583053
}
30593054

3060-
// TODO: Fold into LoopVectorizationLegality::isMaskRequired.
3061-
bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
3062-
// TODO: We can use the loop-preheader as context point here and get
3063-
// context sensitive reasoning for isSafeToSpeculativelyExecute.
3064-
if (isSafeToSpeculativelyExecute(I) ||
3065-
(isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
3055+
bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
3056+
bool FoldTailByMasking) const {
3057+
if (isSafeToSpeculativelyExecute(I, TheLoop->getLatchCmpInst()) ||
3058+
(isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains(I)) ||
30663059
isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
30673060
return false;
30683061

30693062
// If the instruction was executed conditionally in the original scalar loop,
30703063
// predication is needed with a mask whose lanes are all possibly inactive.
3071-
if (Legal->blockNeedsPredication(I->getParent()))
3064+
if (blockNeedsPredication(I->getParent()))
30723065
return true;
30733066

3074-
// If we're not folding the tail by masking, predication is unnecessary.
3075-
if (!foldTailByMasking())
3067+
// If we're not folding tail by masking, bail out now.
3068+
if (!FoldTailByMasking)
30763069
return false;
30773070

30783071
// All that remain are instructions with side-effects originally executed in
30793072
// the loop unconditionally, but now execute under a tail-fold mask (only)
30803073
// having at least one active lane (the first). If the side-effects of the
30813074
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
30823075
// - it will cause the same side-effects as when masked.
3083-
switch(I->getOpcode()) {
3076+
switch (I->getOpcode()) {
30843077
default:
30853078
llvm_unreachable(
30863079
"instruction should have been considered by earlier checks");
30873080
case Instruction::Call:
30883081
// Side-effects of a Call are assumed to be non-invariant, needing a
30893082
// (fold-tail) mask.
3090-
assert(Legal->isMaskRequired(I) &&
3083+
assert(MaskedOp.contains(I) &&
30913084
"should have returned earlier for calls not needing a mask");
30923085
return true;
30933086
case Instruction::Load:
30943087
// If the address is loop invariant no predication is needed.
3095-
return !Legal->isInvariant(getLoadStorePointerOperand(I));
3088+
return !isInvariant(getLoadStorePointerOperand(I));
30963089
case Instruction::Store: {
30973090
// For stores, we need to prove both speculation safety (which follows from
30983091
// the same argument as loads), but also must prove the value being stored
30993092
// is correct. The easiest form of the later is to require that all values
31003093
// stored are the same.
3101-
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3094+
return !(isInvariant(getLoadStorePointerOperand(I)) &&
31023095
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
31033096
}
31043097
case Instruction::UDiv:
@@ -3222,7 +3215,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
32223215
// load, or any gaps in a store-access).
32233216
bool PredicatedAccessRequiresMasking =
32243217
blockNeedsPredicationForAnyReason(I->getParent()) &&
3225-
Legal->isMaskRequired(I);
3218+
Legal->isMaskRequired(I, foldTailByMasking());
32263219
bool LoadAccessWithGapsRequiresEpilogMasking =
32273220
isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
32283221
!isScalarEpilogueAllowed();
@@ -3312,7 +3305,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
33123305
<< *I << "\n");
33133306
return;
33143307
}
3315-
if (isPredicatedInst(I)) {
3308+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
33163309
LLVM_DEBUG(
33173310
dbgs() << "LV: Found not uniform due to requiring predication: " << *I
33183311
<< "\n");
@@ -5450,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
54505443
// from moving "masked load/store" check from legality to cost model.
54515444
// Masked Load/Gather emulation was previously never allowed.
54525445
// Limited number of Masked Store/Scatter emulation was allowed.
5453-
assert((isPredicatedInst(I)) &&
5446+
assert((Legal->isMaskRequired(I, foldTailByMasking())) &&
54545447
"Expecting a scalar emulated instruction");
54555448
return isa<LoadInst>(I) ||
54565449
(isa<StoreInst>(I) &&
@@ -5752,7 +5745,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
57525745
// If we have a predicated load/store, it will need extra i1 extracts and
57535746
// conditional branches, but may not be executed for each vector lane. Scale
57545747
// the cost by the probability of executing the predicated block.
5755-
if (isPredicatedInst(I)) {
5748+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
57565749
Cost /= getPredBlockCostDivisor(CostKind);
57575750

57585751
// Add the cost of an i1 extract and a branch
@@ -5785,7 +5778,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
57855778
"Stride should be 1 or -1 for consecutive memory access");
57865779
const Align Alignment = getLoadStoreAlignment(I);
57875780
InstructionCost Cost = 0;
5788-
if (Legal->isMaskRequired(I)) {
5781+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
57895782
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
57905783
CostKind);
57915784
} else {
@@ -5838,9 +5831,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
58385831
const Value *Ptr = getLoadStorePointerOperand(I);
58395832

58405833
return TTI.getAddressComputationCost(VectorTy) +
5841-
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
5842-
Legal->isMaskRequired(I), Alignment,
5843-
CostKind, I);
5834+
TTI.getGatherScatterOpCost(
5835+
I->getOpcode(), VectorTy, Ptr,
5836+
Legal->isMaskRequired(I, foldTailByMasking()), Alignment, CostKind,
5837+
I);
58445838
}
58455839

58465840
InstructionCost
@@ -5869,12 +5863,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
58695863
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
58705864
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
58715865
InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
5872-
Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
5873-
UseMaskForGaps);
5866+
Group->getAlign(), AS, CostKind,
5867+
Legal->isMaskRequired(I, foldTailByMasking()), UseMaskForGaps);
58745868

58755869
if (Group->isReverse()) {
58765870
// TODO: Add support for reversed masked interleaved access.
5877-
assert(!Legal->isMaskRequired(I) &&
5871+
assert(!Legal->isMaskRequired(I, foldTailByMasking()) &&
58785872
"Reverse masked interleaved access not supported.");
58795873
Cost += Group->getNumMembers() *
58805874
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6367,7 +6361,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
63676361
continue;
63686362
}
63696363

6370-
bool MaskRequired = Legal->isMaskRequired(CI);
6364+
bool MaskRequired = Legal->isMasked(CI);
63716365
// Compute corresponding vector type for return value and arguments.
63726366
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
63736367
for (Type *ScalarTy : ScalarTys)
@@ -6487,7 +6481,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
64876481
// instruction in the loop. In that case, it is not trivially hoistable.
64886482
auto *OpI = dyn_cast<Instruction>(Op);
64896483
return !OpI || !TheLoop->contains(OpI) ||
6490-
(!isPredicatedInst(OpI) &&
6484+
(!Legal->isMaskRequired(OpI, foldTailByMasking()) &&
64916485
(!isa<PHINode>(OpI) || OpI->getParent() != TheLoop->getHeader()) &&
64926486
all_of(OpI->operands(),
64936487
[this](Value *Op) { return shouldConsiderInvariant(Op); }));
@@ -6675,7 +6669,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66756669
case Instruction::SDiv:
66766670
case Instruction::URem:
66776671
case Instruction::SRem:
6678-
if (VF.isVector() && isPredicatedInst(I)) {
6672+
if (VF.isVector() && Legal->isMaskRequired(I, foldTailByMasking())) {
66796673
const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost(I, VF);
66806674
return isDivRemScalarWithPredication(ScalarCost, SafeDivisorCost) ?
66816675
ScalarCost : SafeDivisorCost;
@@ -6859,8 +6853,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
68596853
return TTI::CastContextHint::Interleave;
68606854
case LoopVectorizationCostModel::CM_Scalarize:
68616855
case LoopVectorizationCostModel::CM_Widen:
6862-
return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
6863-
: TTI::CastContextHint::Normal;
6856+
return Legal->isMaskRequired(I, foldTailByMasking())
6857+
? TTI::CastContextHint::Masked
6858+
: TTI::CastContextHint::Normal;
68646859
case LoopVectorizationCostModel::CM_Widen_Reverse:
68656860
return TTI::CastContextHint::Reversed;
68666861
case LoopVectorizationCostModel::CM_Unknown:
@@ -8417,7 +8412,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
84178412
return nullptr;
84188413

84198414
VPValue *Mask = nullptr;
8420-
if (Legal->isMaskRequired(I))
8415+
if (Legal->isMaskRequired(I, CM.foldTailByMasking()))
84218416
Mask = getBlockInMask(Builder.getInsertBlock());
84228417

84238418
// Determine if the pointer operand of the access is either consecutive or
@@ -8644,7 +8639,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
86448639
// vector variant at this VF requires a mask, so we synthesize an
86458640
// all-true mask.
86468641
VPValue *Mask = nullptr;
8647-
if (Legal->isMaskRequired(CI))
8642+
if (Legal->isMaskRequired(CI, CM.foldTailByMasking()))
86488643
Mask = getBlockInMask(Builder.getInsertBlock());
86498644
else
86508645
Mask = Plan.getOrAddLiveIn(
@@ -8685,7 +8680,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
86858680
case Instruction::URem: {
86868681
// If not provably safe, use a select to form a safe divisor before widening the
86878682
// div/rem operation itself. Otherwise fall through to general handling below.
8688-
if (CM.isPredicatedInst(I)) {
8683+
if (Legal->isMaskRequired(I, CM.foldTailByMasking())) {
86898684
SmallVector<VPValue *> Ops(Operands);
86908685
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
86918686
VPValue *One =
@@ -8768,7 +8763,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
87688763

87698764
// In case of predicated execution (due to tail-folding, or conditional
87708765
// execution, or both), pass the relevant mask.
8771-
if (Legal->isMaskRequired(HI->Store))
8766+
if (Legal->isMaskRequired(HI->Store, CM.foldTailByMasking()))
87728767
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
87738768

87748769
return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
@@ -8781,7 +8776,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
87818776
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
87828777
Range);
87838778

8784-
bool IsPredicated = CM.isPredicatedInst(I);
8779+
bool IsPredicated = Legal->isMaskRequired(I, CM.foldTailByMasking());
87858780

87868781
// Even if the instruction is not marked as uniform, there are certain
87878782
// intrinsic calls that can be effectively treated as such, so we check for

0 commit comments

Comments
 (0)