@@ -1937,27 +1937,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19371937 LLVM_DEBUG (dbgs () << " LAA: Distance for " << *AInst << " to " << *BInst
19381938 << " : " << *Dist << " \n " );
19391939
1940- // Check if we can prove that Sink only accesses memory after Src's end or
1941- // vice versa. At the moment this is limited to cases where either source or
1942- // sink are loop invariant to avoid compile-time increases. This is not
1943- // required for correctness.
1944- if (SE.isLoopInvariant (Src, InnermostLoop) ||
1945- SE.isLoopInvariant (Sink, InnermostLoop)) {
1946- const auto &[SrcStart, SrcEnd] =
1947- getStartAndEndForAccess (InnermostLoop, Src, ATy, PSE, PointerBounds);
1948- const auto &[SinkStart, SinkEnd] =
1949- getStartAndEndForAccess (InnermostLoop, Sink, BTy, PSE, PointerBounds);
1950- if (!isa<SCEVCouldNotCompute>(SrcStart) &&
1951- !isa<SCEVCouldNotCompute>(SrcEnd) &&
1952- !isa<SCEVCouldNotCompute>(SinkStart) &&
1953- !isa<SCEVCouldNotCompute>(SinkEnd)) {
1954- if (SE.isKnownPredicate (CmpInst::ICMP_ULE, SrcEnd, SinkStart))
1955- return MemoryDepChecker::Dependence::NoDep;
1956- if (SE.isKnownPredicate (CmpInst::ICMP_ULE, SinkEnd, SrcStart))
1957- return MemoryDepChecker::Dependence::NoDep;
1958- }
1959- }
1960-
19611940 // Need accesses with constant strides and the same direction for further
19621941 // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
19631942 // similar code or pointer arithmetic that could wrap in the address space.
@@ -2003,12 +1982,45 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20031982 const MemAccessInfo &B, unsigned BIdx) {
20041983 assert (AIdx < BIdx && " Must pass arguments in program order" );
20051984
1985+ // Check if we can prove that Sink only accesses memory after Src's end or
1986+ // vice versa. The helper is used to perform the checks only on the exit paths
1987+ // where it helps to improve the analysis result.
1988+ auto CheckCompletelyBeforeOrAfter = [&]() {
1989+ auto *APtr = A.getPointer ();
1990+ auto *BPtr = B.getPointer ();
1991+
1992+ Type *ATy = getLoadStoreType (InstMap[AIdx]);
1993+ Type *BTy = getLoadStoreType (InstMap[BIdx]);
1994+
1995+ const SCEV *Src = PSE.getSCEV (APtr);
1996+ const SCEV *Sink = PSE.getSCEV (BPtr);
1997+
1998+ const auto &[SrcStart, SrcEnd] =
1999+ getStartAndEndForAccess (InnermostLoop, Src, ATy, PSE, PointerBounds);
2000+ if (isa<SCEVCouldNotCompute>(SrcStart) || isa<SCEVCouldNotCompute>(SrcEnd))
2001+ return false ;
2002+
2003+ const auto &[SinkStart, SinkEnd] =
2004+ getStartAndEndForAccess (InnermostLoop, Sink, BTy, PSE, PointerBounds);
2005+ if (isa<SCEVCouldNotCompute>(SinkStart) ||
2006+ isa<SCEVCouldNotCompute>(SinkEnd))
2007+ return false ;
2008+
2009+ auto &SE = *PSE.getSE ();
2010+ return SE.isKnownPredicate (CmpInst::ICMP_ULE, SrcEnd, SinkStart) ||
2011+ SE.isKnownPredicate (CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2012+ };
2013+
20062014 // Get the dependence distance, stride, type size and what access writes for
20072015 // the dependence between A and B.
20082016 auto Res =
20092017 getDependenceDistanceStrideAndSize (A, InstMap[AIdx], B, InstMap[BIdx]);
2010- if (std::holds_alternative<Dependence::DepType>(Res))
2018+ if (std::holds_alternative<Dependence::DepType>(Res)) {
2019+ if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2020+ CheckCompletelyBeforeOrAfter ())
2021+ return Dependence::NoDep;
20112022 return std::get<Dependence::DepType>(Res);
2023+ }
20122024
20132025 auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
20142026 std::get<DepDistanceStrideAndSizeInfo>(Res);
@@ -2017,6 +2029,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20172029 std::optional<uint64_t > CommonStride =
20182030 StrideA == StrideB ? std::make_optional (StrideA) : std::nullopt ;
20192031 if (isa<SCEVCouldNotCompute>(Dist)) {
2032+ if (CheckCompletelyBeforeOrAfter ())
2033+ return Dependence::NoDep;
2034+
20202035 // TODO: Relax requirement that there is a common stride to retry with
20212036 // non-constant distance dependencies.
20222037 FoundNonConstantDistanceDependence |= CommonStride.has_value ();
@@ -2068,6 +2083,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20682083 // Write to the same location with the same size.
20692084 return Dependence::Forward;
20702085 }
2086+ assert (!CheckCompletelyBeforeOrAfter () &&
2087+ " unexpectedly proved no dependence" );
20712088 LLVM_DEBUG (dbgs () << " LAA: possibly zero dependence difference but "
20722089 " different type sizes\n " );
20732090 return Dependence::Unknown;
@@ -2089,6 +2106,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20892106 // did not set it when strides were different but there is no inherent
20902107 // reason to.
20912108 FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2109+ if (CheckCompletelyBeforeOrAfter ())
2110+ return Dependence::NoDep;
20922111 return Dependence::Unknown;
20932112 }
20942113 if (!HasSameSize ||
@@ -2108,6 +2127,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21082127 // Below we only handle strictly positive distances.
21092128 if (MinDistance <= 0 ) {
21102129 FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2130+ if (CheckCompletelyBeforeOrAfter ())
2131+ return Dependence::NoDep;
2132+
21112133 return Dependence::Unknown;
21122134 }
21132135
@@ -2124,13 +2146,18 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21242146 }
21252147
21262148 if (!HasSameSize) {
2149+ if (CheckCompletelyBeforeOrAfter ())
2150+ return Dependence::NoDep;
21272151 LLVM_DEBUG (dbgs () << " LAA: ReadWrite-Write positive dependency with "
21282152 " different type sizes\n " );
21292153 return Dependence::Unknown;
21302154 }
21312155
2132- if (!CommonStride)
2156+ if (!CommonStride) {
2157+ if (CheckCompletelyBeforeOrAfter ())
2158+ return Dependence::NoDep;
21332159 return Dependence::Unknown;
2160+ }
21342161
21352162 // Bail out early if passed-in parameters make vectorization not feasible.
21362163 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -2178,6 +2205,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21782205 // dependence distance and the distance may be larger at runtime (and safe
21792206 // for vectorization). Classify it as Unknown, so we re-try with runtime
21802207 // checks.
2208+ //
2209+ if (CheckCompletelyBeforeOrAfter ())
2210+ return Dependence::NoDep;
2211+
21812212 return Dependence::Unknown;
21822213 }
21832214 LLVM_DEBUG (dbgs () << " LAA: Failure because of positive minimum distance "
@@ -2190,6 +2221,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21902221 if (MinDistanceNeeded > MinDepDistBytes) {
21912222 LLVM_DEBUG (dbgs () << " LAA: Failure because it needs at least "
21922223 << MinDistanceNeeded << " size in bytes\n " );
2224+ assert (!CheckCompletelyBeforeOrAfter () &&
2225+ " unexpectedly proved no dependence" );
21932226 return Dependence::Backward;
21942227 }
21952228
@@ -2237,6 +2270,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22372270 // For non-constant distances, we checked the lower bound of the dependence
22382271 // distance and the distance may be larger at runtime (and safe for
22392272 // vectorization). Classify it as Unknown, so we re-try with runtime checks.
2273+ assert (!CheckCompletelyBeforeOrAfter () &&
2274+ " unexpectedly proved no dependence" );
22402275 return Dependence::Unknown;
22412276 }
22422277
0 commit comments