diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 52ab38583d5de..49a795b5fd6a7 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -413,29 +413,30 @@ class MemoryDepChecker { uint64_t MaxStride; std::optional CommonStride; - /// TypeByteSize is either the common store size of both accesses, or 0 when - /// store sizes mismatch. - uint64_t TypeByteSize; + /// TypeByteSize is a pair of alloc sizes of the source and sink. + std::pair TypeByteSize; + + // HasSameSize is a boolean indicating whether the store sizes of the source + // and sink are equal. + // TODO: Remove this. + bool HasSameSize; bool AIsWrite; bool BIsWrite; DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride, std::optional CommonStride, - uint64_t TypeByteSize, bool AIsWrite, - bool BIsWrite) + std::pair TypeByteSize, + bool HasSameSize, bool AIsWrite, bool BIsWrite) : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride), - TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} + TypeByteSize(TypeByteSize), HasSameSize(HasSameSize), + AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; /// Get the dependence distance, strides, type size and whether it is a write - /// for the dependence between A and B. Returns a DepType, if we can prove - /// there's no dependence or the analysis fails. Outlined to lambda to limit - /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and - /// others. Returns either the dependence result, if it could already be - /// determined, or a DepDistanceStrideAndSizeInfo struct, noting that - /// TypeByteSize could be 0 when store sizes mismatch, and this should be - /// checked in the caller. + /// for the dependence between A and B. Returns either a DepType, the + /// dependence result, if it could already be determined, or a + /// DepDistanceStrideAndSizeInfo struct. std::variant getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 87fae92977cd2..d6ad855cad9a7 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2090,14 +2090,12 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( return MemoryDepChecker::Dependence::Unknown; } - TypeSize AStoreSz = DL.getTypeStoreSize(ATy); - TypeSize BStoreSz = DL.getTypeStoreSize(BTy); - - // If store sizes are not the same, set TypeByteSize to zero, so we can check - // it in the caller isDependent. uint64_t ASz = DL.getTypeAllocSize(ATy); uint64_t BSz = DL.getTypeAllocSize(BTy); - uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0; + + // Both the source and sink sizes are neeeded in dependence checks, depending + // on the use. + std::pair TypeByteSize(ASz, BSz); uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz; uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz; @@ -2119,8 +2117,23 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( return Dependence::Unknown; } + // When the distance is possibly zero, we're reading/writing the same memory + // location: if the store sizes are not equal, fail with an unknown + // dependence. + TypeSize AStoreSz = DL.getTypeStoreSize(ATy); + TypeSize BStoreSz = DL.getTypeStoreSize(BTy); + if (AStoreSz != BStoreSz && !SE.isKnownNonZero(Dist)) { + LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence distance with " + "different type sizes\n"); + return Dependence::Unknown; + } + + // TODO: Remove this. + bool HasSameSize = AStoreSz == BStoreSz; + return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, - TypeByteSize, AIsWrite, BIsWrite); + TypeByteSize, HasSameSize, AIsWrite, + BIsWrite); } MemoryDepChecker::Dependence::DepType @@ -2152,9 +2165,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return std::get(Res); } - auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] = - std::get(Res); - bool HasSameSize = TypeByteSize > 0; + auto &[Dist, MaxStride, CommonStride, TypeByteSize, HasSameSize, AIsWrite, + BIsWrite] = std::get(Res); ScalarEvolution &SE = *PSE.getSE(); auto &DL = InnermostLoop->getHeader()->getDataLayout(); @@ -2180,7 +2192,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // If the distance between accesses and their strides are known constants, // check whether the accesses interlace each other. if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize && - areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) { + areStridedAccessesIndependent(ConstDist, *CommonStride, + TypeByteSize.first)) { LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); return Dependence::NoDep; } @@ -2194,13 +2207,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Negative distances are not plausible dependencies. if (SE.isKnownNonPositive(Dist)) { if (SE.isKnownNonNegative(Dist)) { - if (HasSameSize) { - // Write to the same location with the same size. - return Dependence::Forward; - } - LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but " - "different type sizes\n"); - return Dependence::Unknown; + // Write to the same location with the same size. + assert(HasSameSize && "Accesses must have the same size"); + return Dependence::Forward; } bool IsTrueDataDependence = (AIsWrite && !BIsWrite); @@ -2218,7 +2227,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, : Dependence::Unknown; } if (!HasSameSize || - couldPreventStoreLoadForward(ConstDist, TypeByteSize)) { + couldPreventStoreLoadForward(ConstDist, TypeByteSize.first)) { LLVM_DEBUG( dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); return Dependence::ForwardButPreventsForwarding; @@ -2284,7 +2293,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // We know that Dist is positive, but it may not be constant. Use the signed // minimum for computations below, as this ensures we compute the closest // possible dependence distance. - uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize; + uint64_t MinDistanceNeeded = + MaxStride * (MinNumIter - 1) + TypeByteSize.first; if (MinDistanceNeeded > static_cast(MinDistance)) { if (!ConstDist) { // For non-constant distances, we checked the lower bound of the @@ -2312,14 +2322,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist && - couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride)) + couldPreventStoreLoadForward(MinDistance, TypeByteSize.first, + *CommonStride)) return Dependence::BackwardVectorizableButPreventsForwarding; uint64_t MaxVF = MinDepDistBytes / MaxStride; LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance << " with max VF = " << MaxVF << '\n'); - uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; + uint64_t MaxVFInBits = MaxVF * TypeByteSize.first * 8; if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) { // For non-constant distances, we checked the lower bound of the dependence // distance and the distance may be larger at runtime (and safe for