@@ -402,7 +402,7 @@ static cl::opt<bool> EnableEarlyExitVectorization(
402402 " Enable vectorization of early exit loops with uncountable exits." ));
403403
404404static cl::opt<unsigned > MaxNumPotentiallyFaultingPointers (
405- " max-num-faulting-pointers" , cl::init(1 ), cl::Hidden,
405+ " max-num-faulting-pointers" , cl::init(0 ), cl::Hidden,
406406 cl::desc(
407407 " The maximum number of potentially faulting pointers we permit when "
408408 " vectorizing loops with uncountable exits." ));
@@ -1621,22 +1621,6 @@ class LoopVectorizationCostModel {
16211621 ElementCount MaxSafeVF,
16221622 bool FoldTailByMasking);
16231623
1624- bool isSafeForAnyVectorWidth () const {
1625- return Legal->isSafeForAnyVectorWidth () &&
1626- (!Legal->hasUncountableEarlyExit () ||
1627- !Legal->getNumPotentiallyFaultingLoads ());
1628- }
1629-
1630- uint64_t getMaxSafeVectorWidthInBits () const {
1631- uint64_t MaxSafeVectorWidth = Legal->getMaxSafeVectorWidthInBits ();
1632- // The legalizer bails out if getMinPageSize does not return a value.
1633- if (Legal->hasUncountableEarlyExit () &&
1634- Legal->getNumPotentiallyFaultingLoads ())
1635- MaxSafeVectorWidth =
1636- std::min (MaxSafeVectorWidth, uint64_t (*TTI.getMinPageSize ()) * 8 );
1637- return MaxSafeVectorWidth;
1638- }
1639-
16401624 // / Checks if scalable vectorization is supported and enabled. Caches the
16411625 // / result to avoid repeated debug dumps for repeated queries.
16421626 bool isScalableVectorizationAllowed ();
@@ -2185,38 +2169,24 @@ class GeneratedRTChecks {
21852169};
21862170} // namespace
21872171
2188- std::optional<unsigned > getMaxVScale (const Function &F,
2189- const TargetTransformInfo &TTI) {
2190- if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2191- return MaxVScale;
2192-
2193- if (F.hasFnAttribute (Attribute::VScaleRange))
2194- return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2195-
2196- return std::nullopt ;
2197- }
2198-
21992172static void addPointerAlignmentChecks (
2200- const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *Loads ,
2201- Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI,
2202- ElementCount VF ) {
2173+ const SmallVectorImpl<std::pair<const SCEV *, Type *>> *Ptrs, Function *F ,
2174+ PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF ,
2175+ unsigned IC ) {
22032176 ScalarEvolution *SE = PSE.getSE ();
22042177 const DataLayout &DL = SE->getDataLayout ();
2205- Type *PtrIntType = DL.getIntPtrType (SE->getContext ());
22062178
2207- const SCEV *Zero = SE->getZero (PtrIntType);
2208- const SCEV *ScevEC = SE->getElementCount (PtrIntType, VF);
2209-
2210- for (auto Load : *Loads) {
2211- APInt EltSize (
2212- DL.getIndexTypeSizeInBits (Load.first ->getPointerOperandType ()),
2213- DL.getTypeStoreSize (Load.first ->getType ()).getFixedValue ());
2214- const SCEV *Start = SE->getPtrToIntExpr (Load.second , PtrIntType);
2179+ for (auto Ptr : *Ptrs) {
2180+ Type *PtrIntType = DL.getIntPtrType (Ptr.first ->getType ());
2181+ APInt EltSize (PtrIntType->getScalarSizeInBits (),
2182+ DL.getTypeStoreSize (Ptr.second ).getFixedValue ());
2183+ const SCEV *Start = SE->getPtrToIntExpr (Ptr.first , PtrIntType);
2184+ const SCEV *ScevEC = SE->getElementCount (PtrIntType, VF * IC);
22152185 const SCEV *Align =
22162186 SE->getMulExpr (ScevEC, SE->getConstant (EltSize),
22172187 (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
22182188 const SCEV *Rem = SE->getURemExpr (Start, Align);
2219- PSE.addPredicate (*(SE->getEqualPredicate (Rem, Zero )));
2189+ PSE.addPredicate (*(SE->getEqualPredicate (Rem, SE-> getZero (PtrIntType) )));
22202190 }
22212191}
22222192
@@ -2389,6 +2359,17 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
23892359 llvm_unreachable (" invalid enum" );
23902360}
23912361
2362+ std::optional<unsigned > getMaxVScale (const Function &F,
2363+ const TargetTransformInfo &TTI) {
2364+ if (std::optional<unsigned > MaxVScale = TTI.getMaxVScale ())
2365+ return MaxVScale;
2366+
2367+ if (F.hasFnAttribute (Attribute::VScaleRange))
2368+ return F.getFnAttribute (Attribute::VScaleRange).getVScaleRangeMax ();
2369+
2370+ return std::nullopt ;
2371+ }
2372+
23922373// / For the given VF and UF and maximum trip count computed for the loop, return
23932374// / whether the induction variable might overflow in the vectorized loop. If not,
23942375// / then we know a runtime overflow check always evaluates to false and can be
@@ -3881,15 +3862,15 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
38813862 return false ;
38823863 }
38833864
3884- if (!isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
3865+ if (!Legal-> isSafeForAnyVectorWidth () && !getMaxVScale (*TheFunction, TTI)) {
38853866 reportVectorizationInfo (" The target does not provide maximum vscale value "
38863867 " for safe distance analysis." ,
38873868 " ScalableVFUnfeasible" , ORE, TheLoop);
38883869 return false ;
38893870 }
38903871
38913872 if (Legal->hasUncountableEarlyExit () &&
3892- Legal->getNumPotentiallyFaultingLoads () &&
3873+ Legal->getNumPotentiallyFaultingPointers () &&
38933874 !TTI.isVScaleKnownToBeAPowerOfTwo ()) {
38943875 reportVectorizationInfo (" Cannot vectorize potentially faulting early exit "
38953876 " loop with scalable vectors." ,
@@ -3908,7 +3889,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
39083889
39093890 auto MaxScalableVF = ElementCount::getScalable (
39103891 std::numeric_limits<ElementCount::ScalarTy>::max ());
3911- if (isSafeForAnyVectorWidth ())
3892+ if (Legal-> isSafeForAnyVectorWidth ())
39123893 return MaxScalableVF;
39133894
39143895 std::optional<unsigned > MaxVScale = getMaxVScale (*TheFunction, TTI);
@@ -3935,11 +3916,11 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
39353916 // the memory accesses that is most restrictive (involved in the smallest
39363917 // dependence distance).
39373918 unsigned MaxSafeElements =
3938- llvm::bit_floor (getMaxSafeVectorWidthInBits () / WidestType);
3919+ llvm::bit_floor (Legal-> getMaxSafeVectorWidthInBits () / WidestType);
39393920
39403921 auto MaxSafeFixedVF = ElementCount::getFixed (MaxSafeElements);
39413922 auto MaxSafeScalableVF = getMaxLegalScalableVF (MaxSafeElements);
3942- if (!isSafeForAnyVectorWidth ())
3923+ if (!Legal-> isSafeForAnyVectorWidth ())
39433924 this ->MaxSafeElements = MaxSafeElements;
39443925
39453926 LLVM_DEBUG (dbgs () << " LV: The max safe fixed VF is: " << MaxSafeFixedVF
@@ -10492,7 +10473,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1049210473 }
1049310474
1049410475 unsigned NumPotentiallyFaultingPointers =
10495- LVL.getNumPotentiallyFaultingLoads ();
10476+ LVL.getNumPotentiallyFaultingPointers ();
1049610477 if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
1049710478 reportVectorizationFailure (" Not worth vectorizing loop with uncountable "
1049810479 " early exit, due to number of potentially "
@@ -10660,15 +10641,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1066010641 // Optimistically generate runtime checks if they are needed. Drop them if
1066110642 // they turn out to not be profitable.
1066210643 if (VF.Width .isVector () || SelectedIC > 1 ) {
10663- if (LVL.getNumPotentiallyFaultingLoads ()) {
10664- assert (SelectedIC == 1 &&
10665- " Interleaving not supported for early exit loops and "
10666- " potentially faulting loads" );
10644+ if (LVL.getNumPotentiallyFaultingPointers ()) {
1066710645 assert (!CM.foldTailWithEVL () &&
1066810646 " Explicit vector length unsupported for early exit loops and "
1066910647 " potentially faulting loads" );
10670- addPointerAlignmentChecks (LVL.getPotentiallyFaultingLoads (), F, PSE,
10671- TTI, VF.Width );
10648+ addPointerAlignmentChecks (LVL.getPotentiallyFaultingPointers (), F, PSE,
10649+ TTI, VF.Width , SelectedIC );
1067210650 }
1067310651 Checks.create (L, *LVL.getLAI (), PSE.getPredicate (), VF.Width , SelectedIC);
1067410652 }
0 commit comments