diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 49a795b5fd6a7..52ab38583d5de 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -413,30 +413,29 @@ class MemoryDepChecker { uint64_t MaxStride; std::optional CommonStride; - /// TypeByteSize is a pair of alloc sizes of the source and sink. - std::pair TypeByteSize; - - // HasSameSize is a boolean indicating whether the store sizes of the source - // and sink are equal. - // TODO: Remove this. - bool HasSameSize; + /// TypeByteSize is either the common store size of both accesses, or 0 when + /// store sizes mismatch. + uint64_t TypeByteSize; bool AIsWrite; bool BIsWrite; DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride, std::optional CommonStride, - std::pair TypeByteSize, - bool HasSameSize, bool AIsWrite, bool BIsWrite) + uint64_t TypeByteSize, bool AIsWrite, + bool BIsWrite) : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride), - TypeByteSize(TypeByteSize), HasSameSize(HasSameSize), - AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} + TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; /// Get the dependence distance, strides, type size and whether it is a write - /// for the dependence between A and B. Returns either a DepType, the - /// dependence result, if it could already be determined, or a - /// DepDistanceStrideAndSizeInfo struct. + /// for the dependence between A and B. Returns a DepType, if we can prove + /// there's no dependence or the analysis fails. Outlined to lambda to limit + /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and + /// others. Returns either the dependence result, if it could already be + /// determined, or a DepDistanceStrideAndSizeInfo struct, noting that + /// TypeByteSize could be 0 when store sizes mismatch, and this should be + /// checked in the caller. std::variant getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 512ae415d1c3b..87fae92977cd2 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2090,12 +2090,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( return MemoryDepChecker::Dependence::Unknown; } + TypeSize AStoreSz = DL.getTypeStoreSize(ATy); + TypeSize BStoreSz = DL.getTypeStoreSize(BTy); + + // If store sizes are not the same, set TypeByteSize to zero, so we can check + // it in the caller isDependent. uint64_t ASz = DL.getTypeAllocSize(ATy); uint64_t BSz = DL.getTypeAllocSize(BTy); - - // Both the source and sink sizes are neeeded in dependence checks, depending - // on the use. - std::pair TypeByteSize(ASz, BSz); + uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0; uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz; uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz; @@ -2117,24 +2119,8 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( return Dependence::Unknown; } - // When the distance is possibly zero, we're reading/writing the same memory - // location: if the store sizes are not equal, fail with an unknown - // dependence. - TypeSize AStoreSz = DL.getTypeStoreSize(ATy); - TypeSize BStoreSz = DL.getTypeStoreSize(BTy); - if (AStoreSz != BStoreSz && SE.isKnownNonPositive(Dist) && - SE.isKnownNonNegative(Dist)) { - LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence distance with " - "different type sizes\n"); - return Dependence::Unknown; - } - - // TODO: Remove this. - bool HasSameSize = AStoreSz == BStoreSz; - return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, - TypeByteSize, HasSameSize, AIsWrite, - BIsWrite); + TypeByteSize, AIsWrite, BIsWrite); } MemoryDepChecker::Dependence::DepType @@ -2166,8 +2152,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return std::get(Res); } - auto &[Dist, MaxStride, CommonStride, TypeByteSize, HasSameSize, AIsWrite, - BIsWrite] = std::get(Res); + auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] = + std::get(Res); + bool HasSameSize = TypeByteSize > 0; ScalarEvolution &SE = *PSE.getSE(); auto &DL = InnermostLoop->getHeader()->getDataLayout(); @@ -2193,8 +2180,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // If the distance between accesses and their strides are known constants, // check whether the accesses interlace each other. if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize && - areStridedAccessesIndependent(ConstDist, *CommonStride, - TypeByteSize.first)) { + areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) { LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); return Dependence::NoDep; } @@ -2208,9 +2194,13 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Negative distances are not plausible dependencies. if (SE.isKnownNonPositive(Dist)) { if (SE.isKnownNonNegative(Dist)) { - // Write to the same location with the same size. - assert(HasSameSize && "Accesses must have the same size"); - return Dependence::Forward; + if (HasSameSize) { + // Write to the same location with the same size. + return Dependence::Forward; + } + LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but " + "different type sizes\n"); + return Dependence::Unknown; } bool IsTrueDataDependence = (AIsWrite && !BIsWrite); @@ -2228,7 +2218,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, : Dependence::Unknown; } if (!HasSameSize || - couldPreventStoreLoadForward(ConstDist, TypeByteSize.first)) { + couldPreventStoreLoadForward(ConstDist, TypeByteSize)) { LLVM_DEBUG( dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); return Dependence::ForwardButPreventsForwarding; @@ -2294,8 +2284,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // We know that Dist is positive, but it may not be constant. Use the signed // minimum for computations below, as this ensures we compute the closest // possible dependence distance. - uint64_t MinDistanceNeeded = - MaxStride * (MinNumIter - 1) + TypeByteSize.first; + uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize; if (MinDistanceNeeded > static_cast(MinDistance)) { if (!ConstDist) { // For non-constant distances, we checked the lower bound of the @@ -2323,15 +2312,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist && - couldPreventStoreLoadForward(MinDistance, TypeByteSize.first, - *CommonStride)) + couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride)) return Dependence::BackwardVectorizableButPreventsForwarding; uint64_t MaxVF = MinDepDistBytes / MaxStride; LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance << " with max VF = " << MaxVF << '\n'); - uint64_t MaxVFInBits = MaxVF * TypeByteSize.first * 8; + uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) { // For non-constant distances, we checked the lower bound of the dependence // distance and the distance may be larger at runtime (and safe for diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll index c367b31f6d445..023a8c056968f 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -187,45 +187,6 @@ exit: ret void } -; In the following test, dependence distance is possibly zero, -; but this is not equivalent to the condition known-non-positive -; and known-non-negative. - -define void @possibly_zero_dist_diff_typesz(ptr %p) { -; CHECK-LABEL: 'possibly_zero_dist_diff_typesz' -; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Forward: -; CHECK-NEXT: %ld.p = load i32, ptr %gep.p.iv.i32, align 1 -> -; CHECK-NEXT: store i16 %trunc, ptr %gep.p.iv.i16, align 1 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: -; -entry: - br label %loop - -loop: - %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] - %gep.p.iv.i32 = getelementptr inbounds nuw i32, ptr %p, i16 %iv - %ld.p = load i32, ptr %gep.p.iv.i32, align 1 - %trunc = trunc i32 %ld.p to i16 - %gep.p.iv.i16 = getelementptr inbounds nuw i16, ptr %p, i16 %iv - store i16 %trunc, ptr %gep.p.iv.i16, align 1 - %iv.next = add nuw nsw i16 %iv, 1 - %exit.cond = icmp eq i16 %iv.next, 32 - br i1 %exit.cond, label %exit, label %loop - -exit: - ret void -} - ; In the following test, the sink is loop-invariant. define void @type_size_equivalence_sink_loopinv(ptr nocapture %vec, i64 %n) {