@@ -1283,11 +1283,6 @@ class LoopVectorizationCostModel {
12831283 // / \p VF is the vectorization factor that will be used to vectorize \p I.
12841284 bool isScalarWithPredication (Instruction *I, ElementCount VF) const ;
12851285
1286- // / Returns true if \p I is an instruction that needs to be predicated
1287- // / at runtime. The result is independent of the predication mechanism.
1288- // / Superset of instructions that return true for isScalarWithPredication.
1289- bool isPredicatedInst (Instruction *I) const ;
1290-
12911286 // / Return the costs for our two available strategies for lowering a
12921287 // / div/rem operation which requires speculating at least one lane.
12931288 // / First result is for scalarization (will be invalid for scalable
@@ -2941,7 +2936,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
29412936
29422937bool LoopVectorizationCostModel::isScalarWithPredication (
29432938 Instruction *I, ElementCount VF) const {
2944- if (!isPredicatedInst (I ))
2939+ if (!Legal-> isMaskRequired (I, foldTailByMasking () ))
29452940 return false ;
29462941
29472942 // Do we have a non-scalar lowering for this predicated
@@ -2980,56 +2975,56 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
29802975 }
29812976}
29822977
2983- // TODO: Fold into LoopVectorizationLegality::isMaskRequired.
2984- bool LoopVectorizationCostModel::isPredicatedInst (Instruction *I ) const {
2978+ bool LoopVectorizationLegality::isMaskRequired (Instruction *I,
2979+ bool FoldTailByMasking ) const {
29852980 // TODO: We can use the loop-preheader as context point here and get
29862981 // context sensitive reasoning for isSafeToSpeculativelyExecute.
29872982 if (isSafeToSpeculativelyExecute (I) ||
2988- (isa<LoadInst, StoreInst, CallInst>(I) && !Legal-> isMaskRequired (I)) ||
2983+ (isa<LoadInst, StoreInst, CallInst>(I) && !isMasked (I)) ||
29892984 isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
29902985 return false ;
29912986
29922987 // If the instruction was executed conditionally in the original scalar loop,
29932988 // predication is needed with a mask whose lanes are all possibly inactive.
2994- if (Legal-> blockNeedsPredication (I->getParent ()))
2989+ if (blockNeedsPredication (I->getParent ()))
29952990 return true ;
29962991
29972992 // If we're not folding the tail by masking, predication is unnecessary.
2998- if (!foldTailByMasking () )
2993+ if (!FoldTailByMasking )
29992994 return false ;
30002995
30012996 // All that remain are instructions with side-effects originally executed in
30022997 // the loop unconditionally, but now execute under a tail-fold mask (only)
30032998 // having at least one active lane (the first). If the side-effects of the
30042999 // instruction are invariant, executing it w/o (the tail-folding) mask is safe
30053000 // - it will cause the same side-effects as when masked.
3006- switch (I->getOpcode ()) {
3001+ switch (I->getOpcode ()) {
30073002 default :
30083003 llvm_unreachable (
30093004 " instruction should have been considered by earlier checks" );
30103005 case Instruction::Call:
30113006 // Side-effects of a Call are assumed to be non-invariant, needing a
30123007 // (fold-tail) mask.
3013- assert (Legal-> isMaskRequired (I) &&
3008+ assert (isMasked (I) &&
30143009 " should have returned earlier for calls not needing a mask" );
30153010 return true ;
30163011 case Instruction::Load:
30173012 // If the address is loop invariant no predication is needed.
3018- return !Legal-> isInvariant (getLoadStorePointerOperand (I));
3013+ return !isInvariant (getLoadStorePointerOperand (I));
30193014 case Instruction::Store: {
30203015 // For stores, we need to prove both speculation safety (which follows from
30213016 // the same argument as loads), but also must prove the value being stored
30223017 // is correct. The easiest form of the later is to require that all values
30233018 // stored are the same.
3024- return !(Legal-> isInvariant (getLoadStorePointerOperand (I)) &&
3019+ return !(isInvariant (getLoadStorePointerOperand (I)) &&
30253020 TheLoop->isLoopInvariant (cast<StoreInst>(I)->getValueOperand ()));
30263021 }
30273022 case Instruction::UDiv:
30283023 case Instruction::SDiv:
30293024 case Instruction::SRem:
30303025 case Instruction::URem:
30313026 // If the divisor is loop-invariant no predication is needed.
3032- return !Legal-> isInvariant (I->getOperand (1 ));
3027+ return !isInvariant (I->getOperand (1 ));
30333028 }
30343029}
30353030
@@ -3144,8 +3139,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
31443139 // (either a gap at the end of a load-access that may result in a speculative
31453140 // load, or any gaps in a store-access).
31463141 bool PredicatedAccessRequiresMasking =
3147- blockNeedsPredicationForAnyReason (I->getParent ()) &&
3148- Legal->isMaskRequired (I);
3142+ blockNeedsPredicationForAnyReason (I->getParent ()) && Legal->isMasked (I);
31493143 bool LoadAccessWithGapsRequiresEpilogMasking =
31503144 isa<LoadInst>(I) && Group->requiresScalarEpilogue () &&
31513145 !isScalarEpilogueAllowed ();
@@ -3235,7 +3229,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
32353229 << *I << " \n " );
32363230 return ;
32373231 }
3238- if (isPredicatedInst (I )) {
3232+ if (Legal-> isMaskRequired (I, foldTailByMasking () )) {
32393233 LLVM_DEBUG (
32403234 dbgs () << " LV: Found not uniform due to requiring predication: " << *I
32413235 << " \n " );
@@ -4952,7 +4946,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
49524946 // from moving "masked load/store" check from legality to cost model.
49534947 // Masked Load/Gather emulation was previously never allowed.
49544948 // Limited number of Masked Store/Scatter emulation was allowed.
4955- assert ((isPredicatedInst (I )) &&
4949+ assert ((Legal-> isMaskRequired (I, foldTailByMasking () )) &&
49564950 " Expecting a scalar emulated instruction" );
49574951 return isa<LoadInst>(I) ||
49584952 (isa<StoreInst>(I) &&
@@ -5254,7 +5248,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
52545248 // If we have a predicated load/store, it will need extra i1 extracts and
52555249 // conditional branches, but may not be executed for each vector lane. Scale
52565250 // the cost by the probability of executing the predicated block.
5257- if (isPredicatedInst (I )) {
5251+ if (Legal-> isMaskRequired (I, foldTailByMasking () )) {
52585252 Cost /= getPredBlockCostDivisor (CostKind);
52595253
52605254 // Add the cost of an i1 extract and a branch
@@ -5287,7 +5281,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
52875281 " Stride should be 1 or -1 for consecutive memory access" );
52885282 const Align Alignment = getLoadStoreAlignment (I);
52895283 InstructionCost Cost = 0 ;
5290- if (Legal->isMaskRequired (I)) {
5284+ if (Legal->isMasked (I)) {
52915285 Cost += TTI.getMaskedMemoryOpCost (I->getOpcode (), VectorTy, Alignment, AS,
52925286 CostKind);
52935287 } else {
@@ -5345,8 +5339,7 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
53455339
53465340 return TTI.getAddressComputationCost (VectorTy) +
53475341 TTI.getGatherScatterOpCost (I->getOpcode (), VectorTy, Ptr,
5348- Legal->isMaskRequired (I), Alignment,
5349- CostKind, I);
5342+ Legal->isMasked (I), Alignment, CostKind, I);
53505343}
53515344
53525345InstructionCost
@@ -5375,12 +5368,11 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
53755368 (isa<StoreInst>(I) && (Group->getNumMembers () < Group->getFactor ()));
53765369 InstructionCost Cost = TTI.getInterleavedMemoryOpCost (
53775370 InsertPos->getOpcode (), WideVecTy, Group->getFactor (), Indices,
5378- Group->getAlign (), AS, CostKind, Legal->isMaskRequired (I),
5379- UseMaskForGaps);
5371+ Group->getAlign (), AS, CostKind, Legal->isMasked (I), UseMaskForGaps);
53805372
53815373 if (Group->isReverse ()) {
53825374 // TODO: Add support for reversed masked interleaved access.
5383- assert (!Legal->isMaskRequired (I) &&
5375+ assert (!Legal->isMasked (I) &&
53845376 " Reverse masked interleaved access not supported." );
53855377 Cost += Group->getNumMembers () *
53865378 TTI.getShuffleCost (TargetTransformInfo::SK_Reverse, VectorTy,
@@ -5873,7 +5865,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
58735865 continue ;
58745866 }
58755867
5876- bool MaskRequired = Legal->isMaskRequired (CI);
5868+ bool MaskRequired = Legal->isMasked (CI);
58775869 // Compute corresponding vector type for return value and arguments.
58785870 Type *RetTy = toVectorizedTy (ScalarRetTy, VF);
58795871 for (Type *ScalarTy : ScalarTys)
@@ -5993,7 +5985,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
59935985 // instruction in the loop. In that case, it is not trivially hoistable.
59945986 auto *OpI = dyn_cast<Instruction>(Op);
59955987 return !OpI || !TheLoop->contains (OpI) ||
5996- (!isPredicatedInst (OpI) &&
5988+ (!Legal-> isMaskRequired (OpI, foldTailByMasking () ) &&
59975989 (!isa<PHINode>(OpI) || OpI->getParent () != TheLoop->getHeader ()) &&
59985990 all_of (OpI->operands (),
59995991 [this ](Value *Op) { return shouldConsiderInvariant (Op); }));
@@ -6176,7 +6168,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
61766168 case Instruction::SDiv:
61776169 case Instruction::URem:
61786170 case Instruction::SRem:
6179- if (VF.isVector () && isPredicatedInst (I )) {
6171+ if (VF.isVector () && Legal-> isMaskRequired (I, foldTailByMasking () )) {
61806172 const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost (I, VF);
61816173 return isDivRemScalarWithPredication (ScalarCost, SafeDivisorCost) ?
61826174 ScalarCost : SafeDivisorCost;
@@ -6360,8 +6352,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
63606352 return TTI::CastContextHint::Interleave;
63616353 case LoopVectorizationCostModel::CM_Scalarize:
63626354 case LoopVectorizationCostModel::CM_Widen:
6363- return isPredicatedInst (I) ? TTI::CastContextHint::Masked
6364- : TTI::CastContextHint::Normal;
6355+ return Legal->isMaskRequired (I, foldTailByMasking ())
6356+ ? TTI::CastContextHint::Masked
6357+ : TTI::CastContextHint::Normal;
63656358 case LoopVectorizationCostModel::CM_Widen_Reverse:
63666359 return TTI::CastContextHint::Reversed;
63676360 case LoopVectorizationCostModel::CM_Unknown:
@@ -7720,7 +7713,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
77207713 return nullptr ;
77217714
77227715 VPValue *Mask = nullptr ;
7723- if (Legal->isMaskRequired (I))
7716+ if (Legal->isMasked (I))
77247717 Mask = getBlockInMask (Builder.getInsertBlock ());
77257718
77267719 // Determine if the pointer operand of the access is either consecutive or
@@ -7916,7 +7909,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
79167909 // vector variant at this VF requires a mask, so we synthesize an
79177910 // all-true mask.
79187911 VPValue *Mask = nullptr ;
7919- if (Legal->isMaskRequired (CI))
7912+ if (Legal->isMasked (CI))
79207913 Mask = getBlockInMask (Builder.getInsertBlock ());
79217914 else
79227915 Mask = Plan.getOrAddLiveIn (
@@ -7957,7 +7950,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
79577950 case Instruction::URem: {
79587951 // If not provably safe, use a select to form a safe divisor before widening the
79597952 // div/rem operation itself. Otherwise fall through to general handling below.
7960- if (CM.isPredicatedInst (I )) {
7953+ if (Legal-> isMaskRequired (I, CM.foldTailByMasking () )) {
79617954 SmallVector<VPValue *> Ops (Operands);
79627955 VPValue *Mask = getBlockInMask (Builder.getInsertBlock ());
79637956 VPValue *One =
@@ -8040,7 +8033,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
80408033
80418034 // In case of predicated execution (due to tail-folding, or conditional
80428035 // execution, or both), pass the relevant mask.
8043- if (Legal->isMaskRequired (HI->Store ))
8036+ if (Legal->isMasked (HI->Store ))
80448037 HGramOps.push_back (getBlockInMask (Builder.getInsertBlock ()));
80458038
80468039 return new VPHistogramRecipe (Opcode, HGramOps, HI->Store ->getDebugLoc ());
@@ -8053,7 +8046,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
80538046 [&](ElementCount VF) { return CM.isUniformAfterVectorization (I, VF); },
80548047 Range);
80558048
8056- bool IsPredicated = CM.isPredicatedInst (I );
8049+ bool IsPredicated = Legal-> isMaskRequired (I, CM.foldTailByMasking () );
80578050
80588051 // Even if the instruction is not marked as uniform, there are certain
80598052 // intrinsic calls that can be effectively treated as such, so we check for
0 commit comments