@@ -1786,22 +1786,21 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
17861786    Status = S;
17871787}
17881788
1789- // / Given a dependence-distance \p Dist between two
1790- // / memory accesses, that have strides in the same direction whose absolute
1791- // / value of the maximum stride is given in \p MaxStride, and that have the same
1792- // / type size \p TypeByteSize, in a loop whose maximum backedge taken count is
1793- // / \p MaxBTC, check if it is possible to prove statically that the dependence
1789+ // / Given a dependence-distance \p Dist between two memory accesses, that have
1790+ // / strides in the same direction whose absolute value of the maximum stride is
1791+ // / given in \p MaxStride, in a loop whose maximum backedge taken count is \p
1792+ // / MaxBTC, check if it is possible to prove statically that the dependence
17941793// / distance is larger than the range that the accesses will travel through the
17951794// / execution of the loop. If so, return true; false otherwise. This is useful
17961795// / for example in loops such as the following (PR31098):
1796+ // /
17971797// /     for (i = 0; i < D; ++i) {
17981798// /                = out[i];
17991799// /       out[i+D] =
18001800// /     }
18011801static  bool  isSafeDependenceDistance (const  DataLayout &DL, ScalarEvolution &SE,
18021802                                     const  SCEV &MaxBTC, const  SCEV &Dist,
1803-                                      uint64_t  MaxStride,
1804-                                      uint64_t  TypeByteSize) {
1803+                                      uint64_t  MaxStride) {
18051804
18061805  //  If we can prove that
18071806  //       (**) |Dist| > MaxBTC * Step
@@ -1820,8 +1819,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
18201819  //  will be executed only if LoopCount >= VF, proving distance >= LoopCount
18211820  //  also guarantees that distance >= VF.
18221821  // 
1823-   const  uint64_t  ByteStride = MaxStride * TypeByteSize;
1824-   const  SCEV *Step = SE.getConstant (MaxBTC.getType (), ByteStride);
1822+   const  SCEV *Step = SE.getConstant (MaxBTC.getType (), MaxStride);
18251823  const  SCEV *Product = SE.getMulExpr (&MaxBTC, Step);
18261824
18271825  const  SCEV *CastedDist = &Dist;
@@ -1851,8 +1849,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
18511849}
18521850
18531851// / Check the dependence for two accesses with the same stride \p Stride.
1854- // / \p Distance is the positive distance and \p TypeByteSize is type size in 
1855- // / bytes.
1852+ // / \p Distance is the positive distance in bytes,  and \p TypeByteSize is type
1853+ // / size in  bytes.
18561854// /
18571855// / \returns true if they are independent.
18581856static  bool  areStridedAccessesIndependent (uint64_t  Distance, uint64_t  Stride,
@@ -1865,25 +1863,23 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
18651863  if  (Distance % TypeByteSize)
18661864    return  false ;
18671865
1868-   uint64_t  ScaledDist = Distance / TypeByteSize;
1869- 
1870-   //  No dependence if the scaled distance is not multiple of the stride.
1866+   //  No dependence if the distance is not multiple of the stride.
18711867  //  E.g.
18721868  //       for (i = 0; i < 1024 ; i += 4)
18731869  //         A[i+2] = A[i] + 1;
18741870  // 
1875-   //  Two accesses in memory (scaled  distance is 2, stride is 4):
1871+   //  Two accesses in memory (distance is 2, stride is 4):
18761872  //      | A[0] |      |      |      | A[4] |      |      |      |
18771873  //      |      |      | A[2] |      |      |      | A[6] |      |
18781874  // 
18791875  //  E.g.
18801876  //       for (i = 0; i < 1024 ; i += 3)
18811877  //         A[i+4] = A[i] + 1;
18821878  // 
1883-   //  Two accesses in memory (scaled  distance is 4, stride is 3):
1879+   //  Two accesses in memory (distance is 4, stride is 3):
18841880  //      | A[0] |      |      | A[3] |      |      | A[6] |      |      |
18851881  //      |      |      |      |      | A[4] |      |      | A[7] |      |
1886-   return  ScaledDist  % Stride;
1882+   return  Distance  % Stride;
18871883}
18881884
18891885std::variant<MemoryDepChecker::Dependence::DepType,
@@ -1992,25 +1988,28 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19921988    return  MemoryDepChecker::Dependence::Unknown;
19931989  }
19941990
1995-   uint64_t  TypeByteSize = DL.getTypeAllocSize (ATy);
1996-   bool  HasSameSize =
1997-       DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
1998-   if  (!HasSameSize)
1999-     TypeByteSize = 0 ;
1991+   TypeSize AStoreSz = DL.getTypeStoreSize (ATy);
1992+   TypeSize BStoreSz = DL.getTypeStoreSize (BTy);
1993+ 
1994+   //  If store sizes are not the same, set TypeByteSize to zero, so we can check
1995+   //  it in the caller isDependent.
1996+   uint64_t  ASz = DL.getTypeAllocSize (ATy);
1997+   uint64_t  BSz = DL.getTypeAllocSize (BTy);
1998+   uint64_t  TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0 ;
20001999
2001-   StrideAPtrInt  = std::abs (StrideAPtrInt);
2002-   StrideBPtrInt  = std::abs (StrideBPtrInt);
2000+   uint64_t  StrideAScaled  = std::abs (StrideAPtrInt) * ASz ;
2001+   uint64_t  StrideBScaled  = std::abs (StrideBPtrInt) * BSz ;
20032002
2004-   uint64_t  MaxStride = std::max (StrideAPtrInt, StrideBPtrInt );
2003+   uint64_t  MaxStride = std::max (StrideAScaled, StrideBScaled );
20052004
20062005  std::optional<uint64_t > CommonStride;
2007-   if  (StrideAPtrInt  == StrideBPtrInt )
2008-     CommonStride = StrideAPtrInt ;
2006+   if  (StrideAScaled  == StrideBScaled )
2007+     CommonStride = StrideAScaled ;
20092008
20102009  //  TODO: Historically, we don't retry with runtime checks unless the
20112010  //  (unscaled) strides are the same. Fix this once the condition for runtime
20122011  //  checks in isDependent is fixed.
2013-   bool  ShouldRetryWithRuntimeCheck = CommonStride. has_value () ;
2012+   bool  ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt ;
20142013
20152014  return  DepDistanceStrideAndSizeInfo (Dist, MaxStride, CommonStride,
20162015                                      ShouldRetryWithRuntimeCheck, TypeByteSize,
@@ -2050,9 +2049,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20502049  //  upper bound of the number of iterations), the accesses are independet, i.e.
20512050  //  they are far enough appart that accesses won't access the same location
20522051  //  across all loop ierations.
2053-   if  (HasSameSize &&  isSafeDependenceDistance ( 
2054-                          DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()), 
2055-                           *Dist, MaxStride, TypeByteSize ))
2052+   if  (HasSameSize &&
2053+       isSafeDependenceDistance ( 
2054+           DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()),  *Dist, MaxStride))
20562055    return  Dependence::NoDep;
20572056
20582057  const  SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
@@ -2156,8 +2155,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21562155
21572156  //  It's not vectorizable if the distance is smaller than the minimum distance
21582157  //  needed for a vectroized/unrolled version. Vectorizing one iteration in
2159-   //  front needs TypeByteSize * Stride . Vectorizing the last iteration needs
2160-   //  TypeByteSize  (No need to plus the last gap distance).
2158+   //  front needs CommonStride . Vectorizing the last iteration needs TypeByteSize 
2159+   //  (No need to plus the last gap distance).
21612160  // 
21622161  //  E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
21632162  //       foo(int *A) {
@@ -2166,7 +2165,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21662165  //           B[i] = A[i] + 1;
21672166  //       }
21682167  // 
2169-   //  Two accesses in memory (stride is 2):
2168+   //  Two accesses in memory (stride is 4 *  2):
21702169  //      | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
21712170  //                               | B[0] |      | B[2] |      | B[4] |
21722171  // 
@@ -2184,8 +2183,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21842183  //  We know that Dist is positive, but it may not be constant. Use the signed
21852184  //  minimum for computations below, as this ensures we compute the closest
21862185  //  possible dependence distance.
2187-   uint64_t  MinDistanceNeeded =
2188-       TypeByteSize * *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2186+   uint64_t  MinDistanceNeeded = *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
21892187  if  (MinDistanceNeeded > static_cast <uint64_t >(MinDistance)) {
21902188    if  (!ConstDist) {
21912189      //  For non-constant distances, we checked the lower bound of the
@@ -2241,7 +2239,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22412239
22422240  //  An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
22432241  //  since there is a backwards dependency.
2244-   uint64_t  MaxVF = MinDepDistBytes / (TypeByteSize * * CommonStride) ;
2242+   uint64_t  MaxVF = MinDepDistBytes / * CommonStride;
22452243  LLVM_DEBUG (dbgs () << " LAA: Positive min distance "   << MinDistance
22462244                    << "  with max VF = "   << MaxVF << ' \n '  );
22472245
0 commit comments