@@ -1292,11 +1292,6 @@ class LoopVectorizationCostModel {
12921292 // / \p VF is the vectorization factor that will be used to vectorize \p I.
12931293 bool isScalarWithPredication (Instruction *I, ElementCount VF) const ;
12941294
1295- // / Returns true if \p I is an instruction that needs to be predicated
1296- // / at runtime. The result is independent of the predication mechanism.
1297- // / Superset of instructions that return true for isScalarWithPredication.
1298- bool isPredicatedInst (Instruction *I) const ;
1299-
13001295 // / Return the costs for our two available strategies for lowering a
13011296 // / div/rem operation which requires speculating at least one lane.
13021297 // / First result is for scalarization (will be invalid for scalable
@@ -3018,7 +3013,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
30183013
30193014bool LoopVectorizationCostModel::isScalarWithPredication (
30203015 Instruction *I, ElementCount VF) const {
3021- if (!isPredicatedInst (I ))
3016+ if (!Legal-> isMaskRequired (I, foldTailByMasking () ))
30223017 return false ;
30233018
30243019 // Do we have a non-scalar lowering for this predicated
@@ -3057,48 +3052,46 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
30573052 }
30583053}
30593054
3060- // TODO: Fold into LoopVectorizationLegality::isMaskRequired.
3061- bool LoopVectorizationCostModel::isPredicatedInst (Instruction *I) const {
3062- // TODO: We can use the loop-preheader as context point here and get
3063- // context sensitive reasoning for isSafeToSpeculativelyExecute.
3064- if (isSafeToSpeculativelyExecute (I) ||
3065- (isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired (I)) ||
3055+ bool LoopVectorizationLegality::isMaskRequired (Instruction *I,
3056+ bool FoldTailByMasking) const {
3057+ if (isSafeToSpeculativelyExecute (I, TheLoop->getLatchCmpInst ()) ||
3058+ (isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains (I)) ||
30663059 isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
30673060 return false ;
30683061
30693062 // If the instruction was executed conditionally in the original scalar loop,
30703063 // predication is needed with a mask whose lanes are all possibly inactive.
3071- if (Legal-> blockNeedsPredication (I->getParent ()))
3064+ if (blockNeedsPredication (I->getParent ()))
30723065 return true ;
30733066
3074- // If we're not folding the tail by masking, predication is unnecessary .
3075- if (!foldTailByMasking () )
3067+ // If we're not folding tail by masking, bail out now .
3068+ if (!FoldTailByMasking )
30763069 return false ;
30773070
30783071 // All that remain are instructions with side-effects originally executed in
30793072 // the loop unconditionally, but now execute under a tail-fold mask (only)
30803073 // having at least one active lane (the first). If the side-effects of the
30813074 // instruction are invariant, executing it w/o (the tail-folding) mask is safe
30823075 // - it will cause the same side-effects as when masked.
3083- switch (I->getOpcode ()) {
3076+ switch (I->getOpcode ()) {
30843077 default :
30853078 llvm_unreachable (
30863079 " instruction should have been considered by earlier checks" );
30873080 case Instruction::Call:
30883081 // Side-effects of a Call are assumed to be non-invariant, needing a
30893082 // (fold-tail) mask.
3090- assert (Legal-> isMaskRequired (I) &&
3083+ assert (MaskedOp. contains (I) &&
30913084 " should have returned earlier for calls not needing a mask" );
30923085 return true ;
30933086 case Instruction::Load:
30943087 // If the address is loop invariant no predication is needed.
3095- return !Legal-> isInvariant (getLoadStorePointerOperand (I));
3088+ return !isInvariant (getLoadStorePointerOperand (I));
30963089 case Instruction::Store: {
30973090 // For stores, we need to prove both speculation safety (which follows from
30983091 // the same argument as loads), but also must prove the value being stored
30993092 // is correct. The easiest form of the later is to require that all values
31003093 // stored are the same.
3101- return !(Legal-> isInvariant (getLoadStorePointerOperand (I)) &&
3094+ return !(isInvariant (getLoadStorePointerOperand (I)) &&
31023095 TheLoop->isLoopInvariant (cast<StoreInst>(I)->getValueOperand ()));
31033096 }
31043097 case Instruction::UDiv:
@@ -3222,7 +3215,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
32223215 // load, or any gaps in a store-access).
32233216 bool PredicatedAccessRequiresMasking =
32243217 blockNeedsPredicationForAnyReason (I->getParent ()) &&
3225- Legal->isMaskRequired (I);
3218+ Legal->isMaskRequired (I, foldTailByMasking () );
32263219 bool LoadAccessWithGapsRequiresEpilogMasking =
32273220 isa<LoadInst>(I) && Group->requiresScalarEpilogue () &&
32283221 !isScalarEpilogueAllowed ();
@@ -3312,7 +3305,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
33123305 << *I << " \n " );
33133306 return ;
33143307 }
3315- if (isPredicatedInst (I )) {
3308+ if (Legal-> isMaskRequired (I, foldTailByMasking () )) {
33163309 LLVM_DEBUG (
33173310 dbgs () << " LV: Found not uniform due to requiring predication: " << *I
33183311 << " \n " );
@@ -5450,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
54505443 // from moving "masked load/store" check from legality to cost model.
54515444 // Masked Load/Gather emulation was previously never allowed.
54525445 // Limited number of Masked Store/Scatter emulation was allowed.
5453- assert ((isPredicatedInst (I )) &&
5446+ assert ((Legal-> isMaskRequired (I, foldTailByMasking () )) &&
54545447 " Expecting a scalar emulated instruction" );
54555448 return isa<LoadInst>(I) ||
54565449 (isa<StoreInst>(I) &&
@@ -5752,7 +5745,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
57525745 // If we have a predicated load/store, it will need extra i1 extracts and
57535746 // conditional branches, but may not be executed for each vector lane. Scale
57545747 // the cost by the probability of executing the predicated block.
5755- if (isPredicatedInst (I )) {
5748+ if (Legal-> isMaskRequired (I, foldTailByMasking () )) {
57565749 Cost /= getPredBlockCostDivisor (CostKind);
57575750
57585751 // Add the cost of an i1 extract and a branch
@@ -5785,7 +5778,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
57855778 " Stride should be 1 or -1 for consecutive memory access" );
57865779 const Align Alignment = getLoadStoreAlignment (I);
57875780 InstructionCost Cost = 0 ;
5788- if (Legal->isMaskRequired (I)) {
5781+ if (Legal->isMaskRequired (I, foldTailByMasking () )) {
57895782 Cost += TTI.getMaskedMemoryOpCost (I->getOpcode (), VectorTy, Alignment, AS,
57905783 CostKind);
57915784 } else {
@@ -5838,9 +5831,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
58385831 const Value *Ptr = getLoadStorePointerOperand (I);
58395832
58405833 return TTI.getAddressComputationCost (VectorTy) +
5841- TTI.getGatherScatterOpCost (I->getOpcode (), VectorTy, Ptr,
5842- Legal->isMaskRequired (I), Alignment,
5843- CostKind, I);
5834+ TTI.getGatherScatterOpCost (
5835+ I->getOpcode (), VectorTy, Ptr,
5836+ Legal->isMaskRequired (I, foldTailByMasking ()), Alignment, CostKind,
5837+ I);
58445838}
58455839
58465840InstructionCost
@@ -5869,12 +5863,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
58695863 (isa<StoreInst>(I) && (Group->getNumMembers () < Group->getFactor ()));
58705864 InstructionCost Cost = TTI.getInterleavedMemoryOpCost (
58715865 InsertPos->getOpcode (), WideVecTy, Group->getFactor (), Indices,
5872- Group->getAlign (), AS, CostKind, Legal-> isMaskRequired (I),
5873- UseMaskForGaps);
5866+ Group->getAlign (), AS, CostKind,
5867+ Legal-> isMaskRequired (I, foldTailByMasking ()), UseMaskForGaps);
58745868
58755869 if (Group->isReverse ()) {
58765870 // TODO: Add support for reversed masked interleaved access.
5877- assert (!Legal->isMaskRequired (I) &&
5871+ assert (!Legal->isMaskRequired (I, foldTailByMasking () ) &&
58785872 " Reverse masked interleaved access not supported." );
58795873 Cost += Group->getNumMembers () *
58805874 TTI.getShuffleCost (TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6367,7 +6361,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
63676361 continue ;
63686362 }
63696363
6370- bool MaskRequired = Legal->isMaskRequired (CI);
6364+ bool MaskRequired = Legal->isMasked (CI);
63716365 // Compute corresponding vector type for return value and arguments.
63726366 Type *RetTy = toVectorizedTy (ScalarRetTy, VF);
63736367 for (Type *ScalarTy : ScalarTys)
@@ -6487,7 +6481,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
64876481 // instruction in the loop. In that case, it is not trivially hoistable.
64886482 auto *OpI = dyn_cast<Instruction>(Op);
64896483 return !OpI || !TheLoop->contains (OpI) ||
6490- (!isPredicatedInst (OpI) &&
6484+ (!Legal-> isMaskRequired (OpI, foldTailByMasking () ) &&
64916485 (!isa<PHINode>(OpI) || OpI->getParent () != TheLoop->getHeader ()) &&
64926486 all_of (OpI->operands (),
64936487 [this ](Value *Op) { return shouldConsiderInvariant (Op); }));
@@ -6675,7 +6669,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66756669 case Instruction::SDiv:
66766670 case Instruction::URem:
66776671 case Instruction::SRem:
6678- if (VF.isVector () && isPredicatedInst (I )) {
6672+ if (VF.isVector () && Legal-> isMaskRequired (I, foldTailByMasking () )) {
66796673 const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost (I, VF);
66806674 return isDivRemScalarWithPredication (ScalarCost, SafeDivisorCost) ?
66816675 ScalarCost : SafeDivisorCost;
@@ -6859,8 +6853,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
68596853 return TTI::CastContextHint::Interleave;
68606854 case LoopVectorizationCostModel::CM_Scalarize:
68616855 case LoopVectorizationCostModel::CM_Widen:
6862- return Legal->isMaskRequired (I) ? TTI::CastContextHint::Masked
6863- : TTI::CastContextHint::Normal;
6856+ return Legal->isMaskRequired (I, foldTailByMasking ())
6857+ ? TTI::CastContextHint::Masked
6858+ : TTI::CastContextHint::Normal;
68646859 case LoopVectorizationCostModel::CM_Widen_Reverse:
68656860 return TTI::CastContextHint::Reversed;
68666861 case LoopVectorizationCostModel::CM_Unknown:
@@ -8417,7 +8412,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
84178412 return nullptr ;
84188413
84198414 VPValue *Mask = nullptr ;
8420- if (Legal->isMaskRequired (I))
8415+ if (Legal->isMaskRequired (I, CM. foldTailByMasking () ))
84218416 Mask = getBlockInMask (Builder.getInsertBlock ());
84228417
84238418 // Determine if the pointer operand of the access is either consecutive or
@@ -8644,7 +8639,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
86448639 // vector variant at this VF requires a mask, so we synthesize an
86458640 // all-true mask.
86468641 VPValue *Mask = nullptr ;
8647- if (Legal->isMaskRequired (CI))
8642+ if (Legal->isMaskRequired (CI, CM. foldTailByMasking () ))
86488643 Mask = getBlockInMask (Builder.getInsertBlock ());
86498644 else
86508645 Mask = Plan.getOrAddLiveIn (
@@ -8685,7 +8680,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
86858680 case Instruction::URem: {
86868681 // If not provably safe, use a select to form a safe divisor before widening the
86878682 // div/rem operation itself. Otherwise fall through to general handling below.
8688- if (CM.isPredicatedInst (I )) {
8683+ if (Legal-> isMaskRequired (I, CM.foldTailByMasking () )) {
86898684 SmallVector<VPValue *> Ops (Operands);
86908685 VPValue *Mask = getBlockInMask (Builder.getInsertBlock ());
86918686 VPValue *One =
@@ -8768,7 +8763,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
87688763
87698764 // In case of predicated execution (due to tail-folding, or conditional
87708765 // execution, or both), pass the relevant mask.
8771- if (Legal->isMaskRequired (HI->Store ))
8766+ if (Legal->isMaskRequired (HI->Store , CM. foldTailByMasking () ))
87728767 HGramOps.push_back (getBlockInMask (Builder.getInsertBlock ()));
87738768
87748769 return new VPHistogramRecipe (Opcode, HGramOps, HI->Store ->getDebugLoc ());
@@ -8781,7 +8776,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
87818776 [&](ElementCount VF) { return CM.isUniformAfterVectorization (I, VF); },
87828777 Range);
87838778
8784- bool IsPredicated = CM.isPredicatedInst (I );
8779+ bool IsPredicated = Legal-> isMaskRequired (I, CM.foldTailByMasking () );
87858780
87868781 // Even if the instruction is not marked as uniform, there are certain
87878782 // intrinsic calls that can be effectively treated as such, so we check for
0 commit comments