@@ -1313,11 +1313,6 @@ class LoopVectorizationCostModel {
13131313 // / \p VF is the vectorization factor that will be used to vectorize \p I.
13141314 bool isScalarWithPredication (Instruction *I, ElementCount VF) const ;
13151315
1316- // / Returns true if \p I is an instruction that needs to be predicated
1317- // / at runtime. The result is independent of the predication mechanism.
1318- // / Superset of instructions that return true for isScalarWithPredication.
1319- bool isPredicatedInst (Instruction *I) const ;
1320-
13211316 // / Return the costs for our two available strategies for lowering a
13221317 // / div/rem operation which requires speculating at least one lane.
13231318 // / First result is for scalarization (will be invalid for scalable
@@ -3203,7 +3198,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
32033198
32043199bool LoopVectorizationCostModel::isScalarWithPredication (
32053200 Instruction *I, ElementCount VF) const {
3206- if (!isPredicatedInst (I ))
3201+ if (!Legal-> isMaskRequired (I, foldTailByMasking () ))
32073202 return false ;
32083203
32093204 // Do we have a non-scalar lowering for this predicated
@@ -3242,57 +3237,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
32423237 }
32433238}
32443239
3245- // TODO: Fold into LoopVectorizationLegality::isMaskRequired.
3246- bool LoopVectorizationCostModel::isPredicatedInst (Instruction *I) const {
3247- // If predication is not needed, avoid it.
3248- // TODO: We can use the loop-preheader as context point here and get
3249- // context sensitive reasoning for isSafeToSpeculativelyExecute.
3250- if (!blockNeedsPredicationForAnyReason (I->getParent ()) ||
3251- isSafeToSpeculativelyExecute (I) ||
3252- (isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired (I)) ||
3253- isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
3254- return false ;
3255-
3256- // If the instruction was executed conditionally in the original scalar loop,
3257- // predication is needed with a mask whose lanes are all possibly inactive.
3258- if (Legal->blockNeedsPredication (I->getParent ()))
3259- return true ;
3260-
3261- // All that remain are instructions with side-effects originally executed in
3262- // the loop unconditionally, but now execute under a tail-fold mask (only)
3263- // having at least one active lane (the first). If the side-effects of the
3264- // instruction are invariant, executing it w/o (the tail-folding) mask is safe
3265- // - it will cause the same side-effects as when masked.
3266- switch (I->getOpcode ()) {
3267- default :
3268- llvm_unreachable (
3269- " instruction should have been considered by earlier checks" );
3270- case Instruction::Call:
3271- // Side-effects of a Call are assumed to be non-invariant, needing a
3272- // (fold-tail) mask.
3273- assert (Legal->isMaskRequired (I) &&
3274- " should have returned earlier for calls not needing a mask" );
3275- return true ;
3276- case Instruction::Load:
3277- // If the address is loop invariant no predication is needed.
3278- return !Legal->isInvariant (getLoadStorePointerOperand (I));
3279- case Instruction::Store: {
3280- // For stores, we need to prove both speculation safety (which follows from
3281- // the same argument as loads), but also must prove the value being stored
3282- // is correct. The easiest form of the later is to require that all values
3283- // stored are the same.
3284- return !(Legal->isInvariant (getLoadStorePointerOperand (I)) &&
3285- TheLoop->isLoopInvariant (cast<StoreInst>(I)->getValueOperand ()));
3286- }
3287- case Instruction::UDiv:
3288- case Instruction::SDiv:
3289- case Instruction::SRem:
3290- case Instruction::URem:
3291- // If the divisor is loop-invariant no predication is needed.
3292- return !TheLoop->isLoopInvariant (I->getOperand (1 ));
3293- }
3294- }
3295-
32963240std::pair<InstructionCost, InstructionCost>
32973241LoopVectorizationCostModel::getDivRemSpeculationCost (Instruction *I,
32983242 ElementCount VF) const {
@@ -3405,7 +3349,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
34053349 // load, or any gaps in a store-access).
34063350 bool PredicatedAccessRequiresMasking =
34073351 blockNeedsPredicationForAnyReason (I->getParent ()) &&
3408- Legal->isMaskRequired (I);
3352+ Legal->isMaskRequired (I, foldTailByMasking () );
34093353 bool LoadAccessWithGapsRequiresEpilogMasking =
34103354 isa<LoadInst>(I) && Group->requiresScalarEpilogue () &&
34113355 !isScalarEpilogueAllowed ();
@@ -3494,7 +3438,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
34943438 << *I << " \n " );
34953439 return ;
34963440 }
3497- if (isPredicatedInst (I )) {
3441+ if (Legal-> isMaskRequired (I, foldTailByMasking () )) {
34983442 LLVM_DEBUG (
34993443 dbgs () << " LV: Found not uniform due to requiring predication: " << *I
35003444 << " \n " );
@@ -5379,7 +5323,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
53795323 // from moving "masked load/store" check from legality to cost model.
53805324 // Masked Load/Gather emulation was previously never allowed.
53815325 // Limited number of Masked Store/Scatter emulation was allowed.
5382- assert ((isPredicatedInst (I )) &&
5326+ assert ((Legal-> isMaskRequired (I, foldTailByMasking () )) &&
53835327 " Expecting a scalar emulated instruction" );
53845328 return isa<LoadInst>(I) ||
53855329 (isa<StoreInst>(I) &&
@@ -5677,7 +5621,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
56775621 // If we have a predicated load/store, it will need extra i1 extracts and
56785622 // conditional branches, but may not be executed for each vector lane. Scale
56795623 // the cost by the probability of executing the predicated block.
5680- if (isPredicatedInst (I )) {
5624+ if (Legal-> isMaskRequired (I, foldTailByMasking () )) {
56815625 Cost /= getPredBlockCostDivisor (CostKind);
56825626
56835627 // Add the cost of an i1 extract and a branch
@@ -5710,7 +5654,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
57105654 " Stride should be 1 or -1 for consecutive memory access" );
57115655 const Align Alignment = getLoadStoreAlignment (I);
57125656 InstructionCost Cost = 0 ;
5713- if (Legal->isMaskRequired (I)) {
5657+ if (Legal->isMaskRequired (I, foldTailByMasking () )) {
57145658 Cost += TTI.getMaskedMemoryOpCost (I->getOpcode (), VectorTy, Alignment, AS,
57155659 CostKind);
57165660 } else {
@@ -5763,9 +5707,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
57635707 const Value *Ptr = getLoadStorePointerOperand (I);
57645708
57655709 return TTI.getAddressComputationCost (VectorTy) +
5766- TTI.getGatherScatterOpCost (I->getOpcode (), VectorTy, Ptr,
5767- Legal->isMaskRequired (I), Alignment,
5768- CostKind, I);
5710+ TTI.getGatherScatterOpCost (
5711+ I->getOpcode (), VectorTy, Ptr,
5712+ Legal->isMaskRequired (I, foldTailByMasking ()), Alignment, CostKind,
5713+ I);
57695714}
57705715
57715716InstructionCost
@@ -5794,12 +5739,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
57945739 (isa<StoreInst>(I) && (Group->getNumMembers () < Group->getFactor ()));
57955740 InstructionCost Cost = TTI.getInterleavedMemoryOpCost (
57965741 InsertPos->getOpcode (), WideVecTy, Group->getFactor (), Indices,
5797- Group->getAlign (), AS, CostKind, Legal-> isMaskRequired (I),
5798- UseMaskForGaps);
5742+ Group->getAlign (), AS, CostKind,
5743+ Legal-> isMaskRequired (I, foldTailByMasking ()), UseMaskForGaps);
57995744
58005745 if (Group->isReverse ()) {
58015746 // TODO: Add support for reversed masked interleaved access.
5802- assert (!Legal->isMaskRequired (I) &&
5747+ assert (!Legal->isMaskRequired (I, foldTailByMasking () ) &&
58035748 " Reverse masked interleaved access not supported." );
58045749 Cost += Group->getNumMembers () *
58055750 TTI.getShuffleCost (TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6287,7 +6232,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
62876232 continue ;
62886233 }
62896234
6290- bool MaskRequired = Legal->isMaskRequired (CI);
6235+ bool MaskRequired = Legal->isMaskRequired (CI, foldTailByMasking () );
62916236 // Compute corresponding vector type for return value and arguments.
62926237 Type *RetTy = toVectorizedTy (ScalarRetTy, VF);
62936238 for (Type *ScalarTy : ScalarTys)
@@ -6407,7 +6352,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
64076352 // instruction in the loop. In that case, it is not trivially hoistable.
64086353 auto *OpI = dyn_cast<Instruction>(Op);
64096354 return !OpI || !TheLoop->contains (OpI) ||
6410- (!isPredicatedInst (OpI) &&
6355+ (!Legal-> isMaskRequired (OpI, foldTailByMasking () ) &&
64116356 (!isa<PHINode>(OpI) || OpI->getParent () != TheLoop->getHeader ()) &&
64126357 all_of (OpI->operands (),
64136358 [this ](Value *Op) { return shouldConsiderInvariant (Op); }));
@@ -6595,7 +6540,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
65956540 case Instruction::SDiv:
65966541 case Instruction::URem:
65976542 case Instruction::SRem:
6598- if (VF.isVector () && isPredicatedInst (I )) {
6543+ if (VF.isVector () && Legal-> isMaskRequired (I, foldTailByMasking () )) {
65996544 const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost (I, VF);
66006545 return isDivRemScalarWithPredication (ScalarCost, SafeDivisorCost) ?
66016546 ScalarCost : SafeDivisorCost;
@@ -6779,8 +6724,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
67796724 return TTI::CastContextHint::Interleave;
67806725 case LoopVectorizationCostModel::CM_Scalarize:
67816726 case LoopVectorizationCostModel::CM_Widen:
6782- return Legal->isMaskRequired (I) ? TTI::CastContextHint::Masked
6783- : TTI::CastContextHint::Normal;
6727+ return Legal->isMaskRequired (I, foldTailByMasking ())
6728+ ? TTI::CastContextHint::Masked
6729+ : TTI::CastContextHint::Normal;
67846730 case LoopVectorizationCostModel::CM_Widen_Reverse:
67856731 return TTI::CastContextHint::Reversed;
67866732 case LoopVectorizationCostModel::CM_Unknown:
@@ -8317,7 +8263,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83178263 return nullptr ;
83188264
83198265 VPValue *Mask = nullptr ;
8320- if (Legal->isMaskRequired (I))
8266+ if (Legal->isMaskRequired (I, CM. foldTailByMasking () ))
83218267 Mask = getBlockInMask (I->getParent ());
83228268
83238269 // Determine if the pointer operand of the access is either consecutive or
@@ -8543,7 +8489,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
85438489 // vector variant at this VF requires a mask, so we synthesize an
85448490 // all-true mask.
85458491 VPValue *Mask = nullptr ;
8546- if (Legal->isMaskRequired (CI))
8492+ if (Legal->isMaskRequired (CI, CM. foldTailByMasking () ))
85478493 Mask = getBlockInMask (CI->getParent ());
85488494 else
85498495 Mask = Plan.getOrAddLiveIn (
@@ -8584,7 +8530,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
85848530 case Instruction::URem: {
85858531 // If not provably safe, use a select to form a safe divisor before widening the
85868532 // div/rem operation itself. Otherwise fall through to general handling below.
8587- if (CM.isPredicatedInst (I )) {
8533+ if (Legal-> isMaskRequired (I, CM.foldTailByMasking () )) {
85888534 SmallVector<VPValue *> Ops (Operands);
85898535 VPValue *Mask = getBlockInMask (I->getParent ());
85908536 VPValue *One =
@@ -8667,7 +8613,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
86678613
86688614 // In case of predicated execution (due to tail-folding, or conditional
86698615 // execution, or both), pass the relevant mask.
8670- if (Legal->isMaskRequired (HI->Store ))
8616+ if (Legal->isMaskRequired (HI->Store , CM. foldTailByMasking () ))
86718617 HGramOps.push_back (getBlockInMask (HI->Store ->getParent ()));
86728618
86738619 return new VPHistogramRecipe (Opcode,
@@ -8682,7 +8628,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
86828628 [&](ElementCount VF) { return CM.isUniformAfterVectorization (I, VF); },
86838629 Range);
86848630
8685- bool IsPredicated = CM.isPredicatedInst (I );
8631+ bool IsPredicated = Legal-> isMaskRequired (I, CM.foldTailByMasking () );
86868632
86878633 // Even if the instruction is not marked as uniform, there are certain
86888634 // intrinsic calls that can be effectively treated as such, so we check for
0 commit comments