diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 84564563de8e3..8d44f20b7a7d3 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -83,13 +83,15 @@ LLVM_ABI bool isSafeToLoadUnconditionally( LLVM_ABI bool isDereferenceableAndAlignedInLoop( LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC = nullptr, - SmallVectorImpl *Predicates = nullptr); + SmallVectorImpl *Predicates = nullptr, + bool ShouldCheckWrapping = true); /// Return true if the loop \p L cannot fault on any iteration and only /// contains read-only memory accesses. LLVM_ABI bool isDereferenceableReadOnlyLoop( Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - SmallVectorImpl *Predicates = nullptr); + SmallVectorImpl *Predicates = nullptr, + bool ShouldCheckWrapping = true); /// Return true if we know that executing a load from this value cannot trap. /// diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 73bfe1aabb4e0..05280eee3881c 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -922,7 +922,7 @@ LLVM_ABI std::pair getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap, - std::pair> *PointerBounds); + std::pair> *PointerBounds, bool ShouldCheckWrapping = true); class LoopAccessInfoManager { /// The cache. diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index cba37363d0474..1e63859356e09 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -542,7 +542,7 @@ class LoopVectorizationLegality { /// The list above is not based on theoretical limitations of vectorization, /// but simply a statement that more work is needed to support these /// additional cases safely. - bool isVectorizableEarlyExitLoop(); + bool isVectorizableEarlyExitLoop(const bool NeedRuntimeChecks); /// Return true if all of the instructions in the block can be speculatively /// executed, and record the loads/stores that require masking. diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 393f2648de3c9..f7f512bf712b2 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -288,7 +288,8 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { bool llvm::isDereferenceableAndAlignedInLoop( LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, - AssumptionCache *AC, SmallVectorImpl *Predicates) { + AssumptionCache *AC, SmallVectorImpl *Predicates, + bool ShouldCheckWrapping) { const Align Alignment = LI->getAlign(); auto &DL = LI->getDataLayout(); Value *Ptr = LI->getPointerOperand(); @@ -341,8 +342,9 @@ bool llvm::isDereferenceableAndAlignedInLoop( ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates) : SE.getConstantMaxBackedgeTakenCount(L); } - const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess( - L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr); + const auto &[AccessStart, AccessEnd] = + getStartAndEndForAccess(L, PtrScev, LI->getType(), BECount, MaxBECount, + &SE, nullptr, ShouldCheckWrapping); if (isa(AccessStart) || isa(AccessEnd)) return false; @@ -850,11 +852,13 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To, bool llvm::isDereferenceableReadOnlyLoop( Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - SmallVectorImpl *Predicates) { + SmallVectorImpl *Predicates, + bool ShouldCheckWrapping) { for (BasicBlock *BB : L->blocks()) { for (Instruction &I : *BB) { if (auto *LI = dyn_cast(&I)) { - if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) + if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates, + ShouldCheckWrapping)) return false; } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) return false; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index f3a32d3055edb..32b580668e68c 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -273,7 +273,8 @@ std::pair llvm::getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap, - std::pair> *PointerBounds) { + std::pair> *PointerBounds, + bool ShouldCheckWrapping) { std::pair *PtrBoundsPair; if (PointerBounds) { auto [Iter, Ins] = PointerBounds->insert( @@ -308,8 +309,8 @@ std::pair llvm::getStartAndEndForAccess( // sets ScEnd to the maximum unsigned value for the type. Note that LAA // separately checks that accesses cannot not wrap, so unsigned max // represents an upper bound. - if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, - DL)) { + if (!ShouldCheckWrapping || evaluatePtrAddRecAtMaxBTCWillNotWrap( + AR, MaxBTC, EltSizeSCEV, *SE, DL)) { ScEnd = AR->evaluateAtIteration(MaxBTC, *SE); } else { ScEnd = SE->getAddExpr( diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 969d225c6ef2e..8225177d05786 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1643,7 +1643,8 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG( return Result; } -bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { +bool LoopVectorizationLegality::isVectorizableEarlyExitLoop( + const bool NeedRuntimeChecks) { BasicBlock *LatchBB = TheLoop->getLoopLatch(); if (!LatchBB) { reportVectorizationFailure("Loop does not have a latch", @@ -1851,6 +1852,16 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return false; } + // Go over each instruction and look at memory deps. + if (!canVectorizeMemory()) { + LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + auto NeedRuntimeChecks = LAI->getRuntimePointerChecking()->Need; if (isa(PSE.getBackedgeTakenCount())) { if (TheLoop->getExitingBlock()) { reportVectorizationFailure("Cannot vectorize uncountable loop", @@ -1860,7 +1871,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { else return false; } else { - if (!isVectorizableEarlyExitLoop()) { + if (!isVectorizableEarlyExitLoop(NeedRuntimeChecks)) { UncountableEdge = std::nullopt; if (DoExtraAnalysis) Result = false; @@ -1870,14 +1881,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { } } - // Go over each instruction and look at memory deps. - if (!canVectorizeMemory()) { - LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } if (Result) { LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"