From 3bade7eb8f2f34f7fa75c06188275ee05506d9c7 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 8 Jan 2025 12:44:28 +0000 Subject: [PATCH 1/3] LAA: rearrange in preparation to scale strides (NFC) Rearrange the DepDistanceAndSizeInfo struct in preparation to scale strides. getDependenceDistanceStrideAndSize now returns the data of CommonStride, MaxStride, and clarifies when to rety with runtime checks, in place of (unscaled) strides. --- .../llvm/Analysis/LoopAccessAnalysis.h | 16 +++++--- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 37 +++++++++++++------ 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index a35bc7402d1a8..7bc8c4deae1a3 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -366,16 +366,20 @@ class MemoryDepChecker { struct DepDistanceStrideAndSizeInfo { const SCEV *Dist; - uint64_t StrideA; - uint64_t StrideB; + uint64_t MaxStride; + std::optional CommonStride; + bool ShouldRetryWithRuntimeCheck; uint64_t TypeByteSize; bool AIsWrite; bool BIsWrite; - DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA, - uint64_t StrideB, uint64_t TypeByteSize, - bool AIsWrite, bool BIsWrite) - : Dist(Dist), StrideA(StrideA), StrideB(StrideB), + DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride, + std::optional CommonStride, + bool ShouldRetryWithRuntimeCheck, + uint64_t TypeByteSize, bool AIsWrite, + bool BIsWrite) + : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride), + ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck), TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 2c75d5625cb66..38e9145826c08 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1994,8 +1994,23 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); if (!HasSameSize) TypeByteSize = 0; - return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt), - std::abs(StrideBPtrInt), TypeByteSize, + + StrideAPtrInt = std::abs(StrideAPtrInt); + StrideBPtrInt = std::abs(StrideBPtrInt); + + uint64_t MaxStride = std::max(StrideAPtrInt, StrideBPtrInt); + + std::optional CommonStride; + if (StrideAPtrInt == StrideBPtrInt) + CommonStride = StrideAPtrInt; + + // TODO: Historically, we don't retry with runtime checks unless the + // (unscaled) strides are the same. Fix this once the condition for runtime + // checks in isDependent is fixed. + bool ShouldRetryWithRuntimeCheck = CommonStride.has_value(); + + return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, + ShouldRetryWithRuntimeCheck, TypeByteSize, AIsWrite, BIsWrite); } @@ -2011,23 +2026,21 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (std::holds_alternative(Res)) return std::get(Res); - auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] = + auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck, + TypeByteSize, AIsWrite, BIsWrite] = std::get(Res); bool HasSameSize = TypeByteSize > 0; - std::optional CommonStride = - StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt; if (isa(Dist)) { - // TODO: Relax requirement that there is a common stride to retry with - // non-constant distance dependencies. - FoundNonConstantDistanceDependence |= CommonStride.has_value(); + // TODO: Relax requirement that there is a common unscaled stride to retry + // with non-constant distance dependencies. + FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n"); return Dependence::Unknown; } ScalarEvolution &SE = *PSE.getSE(); auto &DL = InnermostLoop->getHeader()->getDataLayout(); - uint64_t MaxStride = std::max(StrideA, StrideB); // If the distance between the acecsses is larger than their maximum absolute // stride multiplied by the symbolic maximum backedge taken count (which is an @@ -2086,7 +2099,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // condition to consider retrying with runtime checks. Historically, we // did not set it when strides were different but there is no inherent // reason to. - FoundNonConstantDistanceDependence |= CommonStride.has_value(); + FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; return Dependence::Unknown; } if (!HasSameSize || @@ -2105,7 +2118,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue(); // Below we only handle strictly positive distances. if (MinDistance <= 0) { - FoundNonConstantDistanceDependence |= CommonStride.has_value(); + FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; return Dependence::Unknown; } @@ -2118,7 +2131,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // condition to consider retrying with runtime checks. Historically, we // did not set it when strides were different but there is no inherent // reason to. - FoundNonConstantDistanceDependence |= CommonStride.has_value(); + FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; } if (!HasSameSize) { From cfb317b8ff45132844fc884d08f4490abe5bd726 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 9 Jan 2025 11:11:02 +0000 Subject: [PATCH 2/3] LAA: add comment on stride units; address review --- llvm/include/llvm/Analysis/LoopAccessAnalysis.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 7bc8c4deae1a3..e055a89ce71ba 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -365,6 +365,10 @@ class MemoryDepChecker { void mergeInStatus(VectorizationSafetyStatus S); struct DepDistanceStrideAndSizeInfo { + // Strides could either be scaled (in bytes, taking the size of the + // underlying type into account), or unscaled (in indexing units; unscaled + // stride = scaled stride / size of underlying type). Here, strides are + // unscaled. const SCEV *Dist; uint64_t MaxStride; std::optional CommonStride; From cd1d507e1cf42164ca12976fda19b2b591a21aed Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 9 Jan 2025 14:00:11 +0000 Subject: [PATCH 3/3] LAA: fix nit --- llvm/include/llvm/Analysis/LoopAccessAnalysis.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index e055a89ce71ba..31374a128856c 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -365,13 +365,15 @@ class MemoryDepChecker { void mergeInStatus(VectorizationSafetyStatus S); struct DepDistanceStrideAndSizeInfo { - // Strides could either be scaled (in bytes, taking the size of the - // underlying type into account), or unscaled (in indexing units; unscaled - // stride = scaled stride / size of underlying type). Here, strides are - // unscaled. const SCEV *Dist; + + /// Strides could either be scaled (in bytes, taking the size of the + /// underlying type into account), or unscaled (in indexing units; unscaled + /// stride = scaled stride / size of underlying type). Here, strides are + /// unscaled. uint64_t MaxStride; std::optional CommonStride; + bool ShouldRetryWithRuntimeCheck; uint64_t TypeByteSize; bool AIsWrite;