@@ -1789,8 +1789,7 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
17891789// / }
17901790static bool isSafeDependenceDistance (const DataLayout &DL, ScalarEvolution &SE,
17911791 const SCEV &MaxBTC, const SCEV &Dist,
1792- uint64_t MaxStride,
1793- uint64_t TypeByteSize) {
1792+ uint64_t MaxStride) {
17941793
17951794 // If we can prove that
17961795 // (**) |Dist| > MaxBTC * Step
@@ -1809,8 +1808,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
18091808 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
18101809 // also guarantees that distance >= VF.
18111810 //
1812- const uint64_t ByteStride = MaxStride * TypeByteSize;
1813- const SCEV *Step = SE.getConstant (MaxBTC.getType (), ByteStride);
1811+ const SCEV *Step = SE.getConstant (MaxBTC.getType (), MaxStride);
18141812 const SCEV *Product = SE.getMulExpr (&MaxBTC, Step);
18151813
18161814 const SCEV *CastedDist = &Dist;
@@ -1854,25 +1852,23 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
18541852 if (Distance % TypeByteSize)
18551853 return false ;
18561854
1857- uint64_t ScaledDist = Distance / TypeByteSize;
1858-
1859- // No dependence if the scaled distance is not multiple of the stride.
1855+ // No dependence if the distance is not multiple of the stride.
18601856 // E.g.
18611857 // for (i = 0; i < 1024 ; i += 4)
18621858 // A[i+2] = A[i] + 1;
18631859 //
1864- // Two accesses in memory (scaled distance is 2, stride is 4):
1860+ // Two accesses in memory (distance is 2, stride is 4):
18651861 // | A[0] | | | | A[4] | | | |
18661862 // | | | A[2] | | | | A[6] | |
18671863 //
18681864 // E.g.
18691865 // for (i = 0; i < 1024 ; i += 3)
18701866 // A[i+4] = A[i] + 1;
18711867 //
1872- // Two accesses in memory (scaled distance is 4, stride is 3):
1868+ // Two accesses in memory (distance is 4, stride is 3):
18731869 // | A[0] | | | A[3] | | | A[6] | | |
18741870 // | | | | | A[4] | | | A[7] | |
1875- return ScaledDist % Stride;
1871+ return Distance % Stride;
18761872}
18771873
18781874std::variant<MemoryDepChecker::Dependence::DepType,
@@ -1981,25 +1977,32 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19811977 return MemoryDepChecker::Dependence::Unknown;
19821978 }
19831979
1984- uint64_t TypeByteSize = DL.getTypeAllocSize (ATy);
1985- bool HasSameSize =
1986- DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
1987- if (!HasSameSize)
1988- TypeByteSize = 0 ;
1980+ TypeSize AStoreSz = DL.getTypeStoreSize (ATy),
1981+ BStoreSz = DL.getTypeStoreSize (BTy);
1982+
1983+ // Fail early if either store size is scalable.
1984+ if (AStoreSz.isScalable () || BStoreSz.isScalable ())
1985+ return MemoryDepChecker::Dependence::Unknown;
1986+
1987+ // If store sizes are not the same, set TypeByteSize to zero, so we can check
1988+ // it in the caller.
1989+ uint64_t ASz = alignTo (AStoreSz, DL.getABITypeAlign (ATy)),
1990+ BSz = alignTo (BStoreSz, DL.getABITypeAlign (BTy)),
1991+ TypeByteSize = AStoreSz == BStoreSz ? BSz : 0 ;
19891992
1990- StrideAPtrInt = std::abs (StrideAPtrInt);
1991- StrideBPtrInt = std::abs (StrideBPtrInt);
1993+ uint64_t StrideAScaled = std::abs (StrideAPtrInt) * ASz ;
1994+ uint64_t StrideBScaled = std::abs (StrideBPtrInt) * BSz ;
19921995
1993- uint64_t MaxStride = std::max (StrideAPtrInt, StrideBPtrInt );
1996+ uint64_t MaxStride = std::max (StrideAScaled, StrideBScaled );
19941997
19951998 std::optional<uint64_t > CommonStride;
1996- if (StrideAPtrInt == StrideBPtrInt )
1997- CommonStride = StrideAPtrInt ;
1999+ if (StrideAScaled == StrideBScaled )
2000+ CommonStride = StrideAScaled ;
19982001
19992002 // TODO: Historically, we don't retry with runtime checks unless the
20002003 // (unscaled) strides are the same. Fix this once the condition for runtime
20012004 // checks in isDependent is fixed.
2002- bool ShouldRetryWithRuntimeCheck = CommonStride. has_value () ;
2005+ bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt ;
20032006
20042007 return DepDistanceStrideAndSizeInfo (Dist, MaxStride, CommonStride,
20052008 ShouldRetryWithRuntimeCheck, TypeByteSize,
@@ -2039,9 +2042,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20392042 // upper bound of the number of iterations), the accesses are independet, i.e.
20402043 // they are far enough appart that accesses won't access the same location
20412044 // across all loop ierations.
2042- if (HasSameSize && isSafeDependenceDistance (
2043- DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()),
2044- *Dist, MaxStride, TypeByteSize ))
2045+ if (HasSameSize &&
2046+ isSafeDependenceDistance (
2047+ DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()), *Dist, MaxStride))
20452048 return Dependence::NoDep;
20462049
20472050 const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
@@ -2145,8 +2148,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21452148
21462149 // It's not vectorizable if the distance is smaller than the minimum distance
21472150 // needed for a vectroized/unrolled version. Vectorizing one iteration in
2148- // front needs TypeByteSize * Stride . Vectorizing the last iteration needs
2149- // TypeByteSize (No need to plus the last gap distance).
2151+ // front needs CommonStride . Vectorizing the last iteration needs TypeByteSize
2152+ // (No need to plus the last gap distance).
21502153 //
21512154 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
21522155 // foo(int *A) {
@@ -2173,8 +2176,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21732176 // We know that Dist is positive, but it may not be constant. Use the signed
21742177 // minimum for computations below, as this ensures we compute the closest
21752178 // possible dependence distance.
2176- uint64_t MinDistanceNeeded =
2177- TypeByteSize * *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2179+ uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
21782180 if (MinDistanceNeeded > static_cast <uint64_t >(MinDistance)) {
21792181 if (!ConstDist) {
21802182 // For non-constant distances, we checked the lower bound of the
@@ -2230,7 +2232,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22302232
22312233 // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
22322234 // since there is a backwards dependency.
2233- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * * CommonStride) ;
2235+ uint64_t MaxVF = MinDepDistBytes / * CommonStride;
22342236 LLVM_DEBUG (dbgs () << " LAA: Positive min distance " << MinDistance
22352237 << " with max VF = " << MaxVF << ' \n ' );
22362238
0 commit comments