@@ -1786,22 +1786,21 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
17861786 Status = S;
17871787}
17881788
1789- // / Given a dependence-distance \p Dist between two
1790- // / memory accesses, that have strides in the same direction whose absolute
1791- // / value of the maximum stride is given in \p MaxStride, and that have the same
1792- // / type size \p TypeByteSize, in a loop whose maximum backedge taken count is
1793- // / \p MaxBTC, check if it is possible to prove statically that the dependence
1789+ // / Given a dependence-distance \p Dist between two memory accesses, that have
1790+ // / strides in the same direction whose absolute value of the maximum stride is
1791+ // / given in \p MaxStride, in a loop whose maximum backedge taken count is \p
1792+ // / MaxBTC, check if it is possible to prove statically that the dependence
17941793// / distance is larger than the range that the accesses will travel through the
17951794// / execution of the loop. If so, return true; false otherwise. This is useful
17961795// / for example in loops such as the following (PR31098):
1796+ // /
17971797// / for (i = 0; i < D; ++i) {
17981798// / = out[i];
17991799// / out[i+D] =
18001800// / }
18011801static bool isSafeDependenceDistance (const DataLayout &DL, ScalarEvolution &SE,
18021802 const SCEV &MaxBTC, const SCEV &Dist,
1803- uint64_t MaxStride,
1804- uint64_t TypeByteSize) {
1803+ uint64_t MaxStride) {
18051804
18061805 // If we can prove that
18071806 // (**) |Dist| > MaxBTC * Step
@@ -1820,8 +1819,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
18201819 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
18211820 // also guarantees that distance >= VF.
18221821 //
1823- const uint64_t ByteStride = MaxStride * TypeByteSize;
1824- const SCEV *Step = SE.getConstant (MaxBTC.getType (), ByteStride);
1822+ const SCEV *Step = SE.getConstant (MaxBTC.getType (), MaxStride);
18251823 const SCEV *Product = SE.getMulExpr (&MaxBTC, Step);
18261824
18271825 const SCEV *CastedDist = &Dist;
@@ -1851,8 +1849,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
18511849}
18521850
18531851// / Check the dependence for two accesses with the same stride \p Stride.
1854- // / \p Distance is the positive distance and \p TypeByteSize is type size in
1855- // / bytes.
1852+ // / \p Distance is the positive distance in bytes, and \p TypeByteSize is type
1853+ // / size in bytes.
18561854// /
18571855// / \returns true if they are independent.
18581856static bool areStridedAccessesIndependent (uint64_t Distance, uint64_t Stride,
@@ -1865,25 +1863,23 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
18651863 if (Distance % TypeByteSize)
18661864 return false ;
18671865
1868- uint64_t ScaledDist = Distance / TypeByteSize;
1869-
1870- // No dependence if the scaled distance is not multiple of the stride.
1866+ // No dependence if the distance is not multiple of the stride.
18711867 // E.g.
18721868 // for (i = 0; i < 1024 ; i += 4)
18731869 // A[i+2] = A[i] + 1;
18741870 //
1875- // Two accesses in memory (scaled distance is 2, stride is 4):
1871+ // Two accesses in memory (distance is 2, stride is 4):
18761872 // | A[0] | | | | A[4] | | | |
18771873 // | | | A[2] | | | | A[6] | |
18781874 //
18791875 // E.g.
18801876 // for (i = 0; i < 1024 ; i += 3)
18811877 // A[i+4] = A[i] + 1;
18821878 //
1883- // Two accesses in memory (scaled distance is 4, stride is 3):
1879+ // Two accesses in memory (distance is 4, stride is 3):
18841880 // | A[0] | | | A[3] | | | A[6] | | |
18851881 // | | | | | A[4] | | | A[7] | |
1886- return ScaledDist % Stride;
1882+ return Distance % Stride;
18871883}
18881884
18891885std::variant<MemoryDepChecker::Dependence::DepType,
@@ -1992,25 +1988,28 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19921988 return MemoryDepChecker::Dependence::Unknown;
19931989 }
19941990
1995- uint64_t TypeByteSize = DL.getTypeAllocSize (ATy);
1996- bool HasSameSize =
1997- DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
1998- if (!HasSameSize)
1999- TypeByteSize = 0 ;
1991+ TypeSize AStoreSz = DL.getTypeStoreSize (ATy);
1992+ TypeSize BStoreSz = DL.getTypeStoreSize (BTy);
1993+
1994+ // If store sizes are not the same, set TypeByteSize to zero, so we can check
1995+ // it in the caller isDependent.
1996+ uint64_t ASz = DL.getTypeAllocSize (ATy);
1997+ uint64_t BSz = DL.getTypeAllocSize (BTy);
1998+ uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0 ;
20001999
2001- StrideAPtrInt = std::abs (StrideAPtrInt);
2002- StrideBPtrInt = std::abs (StrideBPtrInt);
2000+ uint64_t StrideAScaled = std::abs (StrideAPtrInt) * ASz ;
2001+ uint64_t StrideBScaled = std::abs (StrideBPtrInt) * BSz ;
20032002
2004- uint64_t MaxStride = std::max (StrideAPtrInt, StrideBPtrInt );
2003+ uint64_t MaxStride = std::max (StrideAScaled, StrideBScaled );
20052004
20062005 std::optional<uint64_t > CommonStride;
2007- if (StrideAPtrInt == StrideBPtrInt )
2008- CommonStride = StrideAPtrInt ;
2006+ if (StrideAScaled == StrideBScaled )
2007+ CommonStride = StrideAScaled ;
20092008
20102009 // TODO: Historically, we don't retry with runtime checks unless the
20112010 // (unscaled) strides are the same. Fix this once the condition for runtime
20122011 // checks in isDependent is fixed.
2013- bool ShouldRetryWithRuntimeCheck = CommonStride. has_value () ;
2012+ bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt ;
20142013
20152014 return DepDistanceStrideAndSizeInfo (Dist, MaxStride, CommonStride,
20162015 ShouldRetryWithRuntimeCheck, TypeByteSize,
@@ -2050,9 +2049,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20502049 // upper bound of the number of iterations), the accesses are independet, i.e.
20512050 // they are far enough appart that accesses won't access the same location
20522051 // across all loop ierations.
2053- if (HasSameSize && isSafeDependenceDistance (
2054- DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()),
2055- *Dist, MaxStride, TypeByteSize ))
2052+ if (HasSameSize &&
2053+ isSafeDependenceDistance (
2054+ DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()), *Dist, MaxStride))
20562055 return Dependence::NoDep;
20572056
20582057 const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
@@ -2156,8 +2155,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21562155
21572156 // It's not vectorizable if the distance is smaller than the minimum distance
21582157 // needed for a vectroized/unrolled version. Vectorizing one iteration in
2159- // front needs TypeByteSize * Stride . Vectorizing the last iteration needs
2160- // TypeByteSize (No need to plus the last gap distance).
2158+ // front needs CommonStride . Vectorizing the last iteration needs TypeByteSize
2159+ // (No need to plus the last gap distance).
21612160 //
21622161 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
21632162 // foo(int *A) {
@@ -2166,7 +2165,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21662165 // B[i] = A[i] + 1;
21672166 // }
21682167 //
2169- // Two accesses in memory (stride is 2):
2168+ // Two accesses in memory (stride is 4 * 2):
21702169 // | A[0] | | A[2] | | A[4] | | A[6] | |
21712170 // | B[0] | | B[2] | | B[4] |
21722171 //
@@ -2184,8 +2183,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21842183 // We know that Dist is positive, but it may not be constant. Use the signed
21852184 // minimum for computations below, as this ensures we compute the closest
21862185 // possible dependence distance.
2187- uint64_t MinDistanceNeeded =
2188- TypeByteSize * *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2186+ uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
21892187 if (MinDistanceNeeded > static_cast <uint64_t >(MinDistance)) {
21902188 if (!ConstDist) {
21912189 // For non-constant distances, we checked the lower bound of the
@@ -2241,7 +2239,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22412239
22422240 // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
22432241 // since there is a backwards dependency.
2244- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * * CommonStride) ;
2242+ uint64_t MaxVF = MinDepDistBytes / * CommonStride;
22452243 LLVM_DEBUG (dbgs () << " LAA: Positive min distance " << MinDistance
22462244 << " with max VF = " << MaxVF << ' \n ' );
22472245
0 commit comments