@@ -1292,11 +1292,6 @@ class LoopVectorizationCostModel {
12921292 // / \p VF is the vectorization factor that will be used to vectorize \p I.
12931293 bool isScalarWithPredication (Instruction *I, ElementCount VF) const ;
12941294
1295- // / Returns true if \p I is an instruction that needs to be predicated
1296- // / at runtime. The result is independent of the predication mechanism.
1297- // / Superset of instructions that return true for isScalarWithPredication.
1298- bool isPredicatedInst (Instruction *I) const ;
1299-
13001295 // / Return the costs for our two available strategies for lowering a
13011296 // / div/rem operation which requires speculating at least one lane.
13021297 // / First result is for scalarization (will be invalid for scalable
@@ -3018,7 +3013,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
30183013
30193014bool LoopVectorizationCostModel::isScalarWithPredication (
30203015 Instruction *I, ElementCount VF) const {
3021- if (!isPredicatedInst (I ))
3016+ if (!Legal-> isMaskRequired (I, foldTailByMasking () ))
30223017 return false ;
30233018
30243019 // Do we have a non-scalar lowering for this predicated
@@ -3057,59 +3052,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
30573052 }
30583053}
30593054
3060- // TODO: Fold into LoopVectorizationLegality::isMaskRequired.
3061- bool LoopVectorizationCostModel::isPredicatedInst (Instruction *I) const {
3062- // TODO: We can use the loop-preheader as context point here and get
3063- // context sensitive reasoning for isSafeToSpeculativelyExecute.
3064- if (isSafeToSpeculativelyExecute (I) ||
3065- (isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired (I)) ||
3066- isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
3067- return false ;
3068-
3069- // If the instruction was executed conditionally in the original scalar loop,
3070- // predication is needed with a mask whose lanes are all possibly inactive.
3071- if (Legal->blockNeedsPredication (I->getParent ()))
3072- return true ;
3073-
3074- // If we're not folding the tail by masking, predication is unnecessary.
3075- if (!foldTailByMasking ())
3076- return false ;
3077-
3078- // All that remain are instructions with side-effects originally executed in
3079- // the loop unconditionally, but now execute under a tail-fold mask (only)
3080- // having at least one active lane (the first). If the side-effects of the
3081- // instruction are invariant, executing it w/o (the tail-folding) mask is safe
3082- // - it will cause the same side-effects as when masked.
3083- switch (I->getOpcode ()) {
3084- default :
3085- llvm_unreachable (
3086- " instruction should have been considered by earlier checks" );
3087- case Instruction::Call:
3088- // Side-effects of a Call are assumed to be non-invariant, needing a
3089- // (fold-tail) mask.
3090- assert (Legal->isMaskRequired (I) &&
3091- " should have returned earlier for calls not needing a mask" );
3092- return true ;
3093- case Instruction::Load:
3094- // If the address is loop invariant no predication is needed.
3095- return !Legal->isInvariant (getLoadStorePointerOperand (I));
3096- case Instruction::Store: {
3097- // For stores, we need to prove both speculation safety (which follows from
3098- // the same argument as loads), but also must prove the value being stored
3099- // is correct. The easiest form of the later is to require that all values
3100- // stored are the same.
3101- return !(Legal->isInvariant (getLoadStorePointerOperand (I)) &&
3102- TheLoop->isLoopInvariant (cast<StoreInst>(I)->getValueOperand ()));
3103- }
3104- case Instruction::UDiv:
3105- case Instruction::SDiv:
3106- case Instruction::SRem:
3107- case Instruction::URem:
3108- // If the divisor is loop-invariant no predication is needed.
3109- return !TheLoop->isLoopInvariant (I->getOperand (1 ));
3110- }
3111- }
3112-
31133055std::pair<InstructionCost, InstructionCost>
31143056LoopVectorizationCostModel::getDivRemSpeculationCost (Instruction *I,
31153057 ElementCount VF) const {
@@ -3222,7 +3164,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
32223164 // load, or any gaps in a store-access).
32233165 bool PredicatedAccessRequiresMasking =
32243166 blockNeedsPredicationForAnyReason (I->getParent ()) &&
3225- Legal->isMaskRequired (I);
3167+ Legal->isMaskRequired (I, foldTailByMasking () );
32263168 bool LoadAccessWithGapsRequiresEpilogMasking =
32273169 isa<LoadInst>(I) && Group->requiresScalarEpilogue () &&
32283170 !isScalarEpilogueAllowed ();
@@ -3312,7 +3254,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
33123254 << *I << " \n " );
33133255 return ;
33143256 }
3315- if (isPredicatedInst (I )) {
3257+ if (Legal-> isMaskRequired (I, foldTailByMasking () )) {
33163258 LLVM_DEBUG (
33173259 dbgs () << " LV: Found not uniform due to requiring predication: " << *I
33183260 << " \n " );
@@ -5450,7 +5392,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
54505392 // from moving "masked load/store" check from legality to cost model.
54515393 // Masked Load/Gather emulation was previously never allowed.
54525394 // Limited number of Masked Store/Scatter emulation was allowed.
5453- assert ((isPredicatedInst (I )) &&
5395+ assert ((Legal-> isMaskRequired (I, foldTailByMasking () )) &&
54545396 " Expecting a scalar emulated instruction" );
54555397 return isa<LoadInst>(I) ||
54565398 (isa<StoreInst>(I) &&
@@ -5752,7 +5694,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
57525694 // If we have a predicated load/store, it will need extra i1 extracts and
57535695 // conditional branches, but may not be executed for each vector lane. Scale
57545696 // the cost by the probability of executing the predicated block.
5755- if (isPredicatedInst (I )) {
5697+ if (Legal-> isMaskRequired (I, foldTailByMasking () )) {
57565698 Cost /= getPredBlockCostDivisor (CostKind);
57575699
57585700 // Add the cost of an i1 extract and a branch
@@ -5785,7 +5727,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
57855727 " Stride should be 1 or -1 for consecutive memory access" );
57865728 const Align Alignment = getLoadStoreAlignment (I);
57875729 InstructionCost Cost = 0 ;
5788- if (Legal->isMaskRequired (I)) {
5730+ if (Legal->isMaskRequired (I, foldTailByMasking () )) {
57895731 Cost += TTI.getMaskedMemoryOpCost (I->getOpcode (), VectorTy, Alignment, AS,
57905732 CostKind);
57915733 } else {
@@ -5838,9 +5780,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
58385780 const Value *Ptr = getLoadStorePointerOperand (I);
58395781
58405782 return TTI.getAddressComputationCost (VectorTy) +
5841- TTI.getGatherScatterOpCost (I->getOpcode (), VectorTy, Ptr,
5842- Legal->isMaskRequired (I), Alignment,
5843- CostKind, I);
5783+ TTI.getGatherScatterOpCost (
5784+ I->getOpcode (), VectorTy, Ptr,
5785+ Legal->isMaskRequired (I, foldTailByMasking ()), Alignment, CostKind,
5786+ I);
58445787}
58455788
58465789InstructionCost
@@ -5869,12 +5812,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
58695812 (isa<StoreInst>(I) && (Group->getNumMembers () < Group->getFactor ()));
58705813 InstructionCost Cost = TTI.getInterleavedMemoryOpCost (
58715814 InsertPos->getOpcode (), WideVecTy, Group->getFactor (), Indices,
5872- Group->getAlign (), AS, CostKind, Legal-> isMaskRequired (I),
5873- UseMaskForGaps);
5815+ Group->getAlign (), AS, CostKind,
5816+ Legal-> isMaskRequired (I, foldTailByMasking ()), UseMaskForGaps);
58745817
58755818 if (Group->isReverse ()) {
58765819 // TODO: Add support for reversed masked interleaved access.
5877- assert (!Legal->isMaskRequired (I) &&
5820+ assert (!Legal->isMaskRequired (I, foldTailByMasking () ) &&
58785821 " Reverse masked interleaved access not supported." );
58795822 Cost += Group->getNumMembers () *
58805823 TTI.getShuffleCost (TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6367,7 +6310,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
63676310 continue ;
63686311 }
63696312
6370- bool MaskRequired = Legal->isMaskRequired (CI);
6313+ bool MaskRequired = Legal->isMaskRequired (CI, foldTailByMasking () );
63716314 // Compute corresponding vector type for return value and arguments.
63726315 Type *RetTy = toVectorizedTy (ScalarRetTy, VF);
63736316 for (Type *ScalarTy : ScalarTys)
@@ -6487,7 +6430,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
64876430 // instruction in the loop. In that case, it is not trivially hoistable.
64886431 auto *OpI = dyn_cast<Instruction>(Op);
64896432 return !OpI || !TheLoop->contains (OpI) ||
6490- (!isPredicatedInst (OpI) &&
6433+ (!Legal-> isMaskRequired (OpI, foldTailByMasking () ) &&
64916434 (!isa<PHINode>(OpI) || OpI->getParent () != TheLoop->getHeader ()) &&
64926435 all_of (OpI->operands (),
64936436 [this ](Value *Op) { return shouldConsiderInvariant (Op); }));
@@ -6675,7 +6618,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66756618 case Instruction::SDiv:
66766619 case Instruction::URem:
66776620 case Instruction::SRem:
6678- if (VF.isVector () && isPredicatedInst (I )) {
6621+ if (VF.isVector () && Legal-> isMaskRequired (I, foldTailByMasking () )) {
66796622 const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost (I, VF);
66806623 return isDivRemScalarWithPredication (ScalarCost, SafeDivisorCost) ?
66816624 ScalarCost : SafeDivisorCost;
@@ -6859,8 +6802,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
68596802 return TTI::CastContextHint::Interleave;
68606803 case LoopVectorizationCostModel::CM_Scalarize:
68616804 case LoopVectorizationCostModel::CM_Widen:
6862- return Legal->isMaskRequired (I) ? TTI::CastContextHint::Masked
6863- : TTI::CastContextHint::Normal;
6805+ return Legal->isMaskRequired (I, foldTailByMasking ())
6806+ ? TTI::CastContextHint::Masked
6807+ : TTI::CastContextHint::Normal;
68646808 case LoopVectorizationCostModel::CM_Widen_Reverse:
68656809 return TTI::CastContextHint::Reversed;
68666810 case LoopVectorizationCostModel::CM_Unknown:
@@ -8417,7 +8361,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
84178361 return nullptr ;
84188362
84198363 VPValue *Mask = nullptr ;
8420- if (Legal->isMaskRequired (I))
8364+ if (Legal->isMaskRequired (I, CM. foldTailByMasking () ))
84218365 Mask = getBlockInMask (Builder.getInsertBlock ());
84228366
84238367 // Determine if the pointer operand of the access is either consecutive or
@@ -8644,7 +8588,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
86448588 // vector variant at this VF requires a mask, so we synthesize an
86458589 // all-true mask.
86468590 VPValue *Mask = nullptr ;
8647- if (Legal->isMaskRequired (CI))
8591+ if (Legal->isMaskRequired (CI, CM. foldTailByMasking () ))
86488592 Mask = getBlockInMask (Builder.getInsertBlock ());
86498593 else
86508594 Mask = Plan.getOrAddLiveIn (
@@ -8685,7 +8629,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
86858629 case Instruction::URem: {
86868630 // If not provably safe, use a select to form a safe divisor before widening the
86878631 // div/rem operation itself. Otherwise fall through to general handling below.
8688- if (CM.isPredicatedInst (I )) {
8632+ if (Legal-> isMaskRequired (I, CM.foldTailByMasking () )) {
86898633 SmallVector<VPValue *> Ops (Operands);
86908634 VPValue *Mask = getBlockInMask (Builder.getInsertBlock ());
86918635 VPValue *One =
@@ -8768,7 +8712,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
87688712
87698713 // In case of predicated execution (due to tail-folding, or conditional
87708714 // execution, or both), pass the relevant mask.
8771- if (Legal->isMaskRequired (HI->Store ))
8715+ if (Legal->isMaskRequired (HI->Store , CM. foldTailByMasking () ))
87728716 HGramOps.push_back (getBlockInMask (Builder.getInsertBlock ()));
87738717
87748718 return new VPHistogramRecipe (Opcode, HGramOps, HI->Store ->getDebugLoc ());
@@ -8781,7 +8725,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
87818725 [&](ElementCount VF) { return CM.isUniformAfterVectorization (I, VF); },
87828726 Range);
87838727
8784- bool IsPredicated = CM.isPredicatedInst (I );
8728+ bool IsPredicated = Legal-> isMaskRequired (I, CM.foldTailByMasking () );
87858729
87868730 // Even if the instruction is not marked as uniform, there are certain
87878731 // intrinsic calls that can be effectively treated as such, so we check for
0 commit comments