@@ -2032,15 +2032,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
20322032 LLVM_DEBUG (dbgs () << " LAA: Distance for " << *AInst << " to " << *BInst
20332033 << " : " << *Dist << " \n " );
20342034
2035- // At the moment this is limited to cases where either source or
2036- // sink are loop invariant to avoid compile-time increases. This is not
2037- // required for correctness.
2038- if (SE.isLoopInvariant (Src, InnermostLoop) ||
2039- SE.isLoopInvariant (Sink, InnermostLoop)) {
2040- if (areAccessesCompletelyBeforeOrAfter (Src, ATy, Sink, BTy))
2041- return Dependence::NoDep;
2042- }
2043-
20442035 // Need accesses with constant strides and the same direction for further
20452036 // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
20462037 // similar code or pointer arithmetic that could wrap in the address space.
@@ -2103,18 +2094,37 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21032094 const MemAccessInfo &B, unsigned BIdx) {
21042095 assert (AIdx < BIdx && " Must pass arguments in program order" );
21052096
2097+ // Check if we can prove that Sink only accesses memory after Src's end or
2098+ // vice versa. The helper is used to perform the checks only on the exit paths
2099+ // where it helps to improve the analysis result.
2100+ auto CheckCompletelyBeforeOrAfter = [&]() {
2101+ auto *APtr = A.getPointer ();
2102+ auto *BPtr = B.getPointer ();
2103+ Type *ATy = getLoadStoreType (InstMap[AIdx]);
2104+ Type *BTy = getLoadStoreType (InstMap[BIdx]);
2105+ const SCEV *Src = PSE.getSCEV (APtr);
2106+ const SCEV *Sink = PSE.getSCEV (BPtr);
2107+ return areAccessesCompletelyBeforeOrAfter (Src, ATy, Sink, BTy);
2108+ };
2109+
21062110 // Get the dependence distance, stride, type size and what access writes for
21072111 // the dependence between A and B.
21082112 auto Res =
21092113 getDependenceDistanceStrideAndSize (A, InstMap[AIdx], B, InstMap[BIdx]);
2110- if (std::holds_alternative<Dependence::DepType>(Res))
2114+ if (std::holds_alternative<Dependence::DepType>(Res)) {
2115+ if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2116+ CheckCompletelyBeforeOrAfter ())
2117+ return Dependence::NoDep;
21112118 return std::get<Dependence::DepType>(Res);
2119+ }
21122120
21132121 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
21142122 std::get<DepDistanceStrideAndSizeInfo>(Res);
21152123 bool HasSameSize = TypeByteSize > 0 ;
21162124
21172125 if (isa<SCEVCouldNotCompute>(Dist)) {
2126+ if (CheckCompletelyBeforeOrAfter ())
2127+ return Dependence::NoDep;
21182128 LLVM_DEBUG (dbgs () << " LAA: Dependence because of uncomputable distance.\n " );
21192129 return Dependence::Unknown;
21202130 }
@@ -2176,8 +2186,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21762186 // forward dependency will allow vectorization using any width.
21772187
21782188 if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2179- if (!ConstDist)
2180- return Dependence::Unknown;
2189+ if (!ConstDist) {
2190+ return CheckCompletelyBeforeOrAfter () ? Dependence::NoDep
2191+ : Dependence::Unknown;
2192+ }
21812193 if (!HasSameSize ||
21822194 couldPreventStoreLoadForward (ConstDist, TypeByteSize)) {
21832195 LLVM_DEBUG (
@@ -2192,10 +2204,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21922204
21932205 int64_t MinDistance = SE.getSignedRangeMin (Dist).getSExtValue ();
21942206 // Below we only handle strictly positive distances.
2195- if (MinDistance <= 0 )
2196- return Dependence::Unknown;
2207+ if (MinDistance <= 0 ) {
2208+ return CheckCompletelyBeforeOrAfter () ? Dependence::NoDep
2209+ : Dependence::Unknown;
2210+ }
21972211
21982212 if (!HasSameSize) {
2213+ if (CheckCompletelyBeforeOrAfter ())
2214+ return Dependence::NoDep;
21992215 LLVM_DEBUG (dbgs () << " LAA: ReadWrite-Write positive dependency with "
22002216 " different type sizes\n " );
22012217 return Dependence::Unknown;
@@ -2247,8 +2263,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22472263 // For non-constant distances, we checked the lower bound of the
22482264 // dependence distance and the distance may be larger at runtime (and safe
22492265 // for vectorization). Classify it as Unknown, so we re-try with runtime
2250- // checks.
2251- return Dependence::Unknown;
2266+ // checks, unless we can prove both accesses cannot overlap.
2267+ return CheckCompletelyBeforeOrAfter () ? Dependence::NoDep
2268+ : Dependence::Unknown;
22522269 }
22532270 LLVM_DEBUG (dbgs () << " LAA: Failure because of positive minimum distance "
22542271 << MinDistance << ' \n ' );
@@ -2279,10 +2296,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22792296 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
22802297 // For non-constant distances, we checked the lower bound of the dependence
22812298 // distance and the distance may be larger at runtime (and safe for
2282- // vectorization). Classify it as Unknown, so we re-try with runtime checks.
2283- return Dependence::Unknown;
2299+ // vectorization). Classify it as Unknown, so we re-try with runtime checks,
2300+ // unless we can prove both accesses cannot overlap.
2301+ return CheckCompletelyBeforeOrAfter () ? Dependence::NoDep
2302+ : Dependence::Unknown;
22842303 }
22852304
2305+ if (CheckCompletelyBeforeOrAfter ())
2306+ return Dependence::NoDep;
2307+
22862308 MaxSafeVectorWidthInBits = std::min (MaxSafeVectorWidthInBits, MaxVFInBits);
22872309 return Dependence::BackwardVectorizable;
22882310}
0 commit comments