From 6142fdf0261e79bcd3e93b3ff63ecc5eafa9c593 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 29 Apr 2025 20:44:31 +0000 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .../llvm/Analysis/LoopAccessAnalysis.h | 43 +++- .../Vectorize/LoopVectorizationLegality.h | 2 +- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 57 ++++- .../safe-with-dep-distance-non-power-of-2.ll | 204 ++++++++++++------ 4 files changed, 218 insertions(+), 88 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index f715e0ec8dbb4..02647adea95a8 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -180,9 +180,10 @@ class MemoryDepChecker { MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, const DenseMap &SymbolicStrides, - unsigned MaxTargetVectorWidthInBits) + unsigned MaxTargetVectorWidthInBits, bool AllowNonPow2Deps) : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides), - MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {} + MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits), + AllowNonPow2Deps(AllowNonPow2Deps) {} /// Register the location (instructions are given increasing numbers) /// of a write access. @@ -218,17 +219,29 @@ class MemoryDepChecker { /// Return true if there are no store-load forwarding dependencies. bool isSafeForAnyStoreLoadForwardDistances() const { - return MaxStoreLoadForwardSafeDistanceInBits == - std::numeric_limits::max(); + return MaxPowerOf2StoreLoadForwardSafeDistanceInBits == + std::numeric_limits::max() && + MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits == + std::numeric_limits::max(); } - /// Return safe power-of-2 number of elements, which do not prevent store-load - /// forwarding, multiplied by the size of the elements in bits. - uint64_t getStoreLoadForwardSafeDistanceInBits() const { + /// Return safe number of elements, which do not prevent store-load + /// forwarding, multiplied by the size of the elements in bits (power-of-2). + uint64_t getPowerOf2StoreLoadForwardSafeDistanceInBits() const { assert(!isSafeForAnyStoreLoadForwardDistances() && "Expected the distance, that prevent store-load forwarding, to be " "set."); - return MaxStoreLoadForwardSafeDistanceInBits; + return MaxPowerOf2StoreLoadForwardSafeDistanceInBits; + } + + /// Return safe number of elements, which do not prevent store-load + /// forwarding, multiplied by the size of the elements in bits + /// (non-power-of-2). + uint64_t getNonPowerOf2StoreLoadForwardSafeDistanceInBits() const { + assert(!isSafeForAnyStoreLoadForwardDistances() && + "Expected the distance, that prevent store-load forwarding, to be " + "set."); + return MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits; } /// In same cases when the dependency check fails we can still @@ -319,9 +332,14 @@ class MemoryDepChecker { /// restrictive. uint64_t MaxSafeVectorWidthInBits = -1U; - /// Maximum power-of-2 number of elements, which do not prevent store-load - /// forwarding, multiplied by the size of the elements in bits. - uint64_t MaxStoreLoadForwardSafeDistanceInBits = + /// Maximum number of elements, which do not prevent store-load forwarding, + /// multiplied by the size of the elements in bits (power-of-2). + uint64_t MaxPowerOf2StoreLoadForwardSafeDistanceInBits = + std::numeric_limits::max(); + + /// Maximum number of elements, which do not prevent store-load forwarding, + /// multiplied by the size of the elements in bits (non-power-of-2). + uint64_t MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits = std::numeric_limits::max(); /// If we see a non-constant dependence distance we can still try to @@ -348,6 +366,9 @@ class MemoryDepChecker { /// backwards-vectorizable or unknown (triggering a runtime check). unsigned MaxTargetVectorWidthInBits = 0; + /// True if current target supports non-power-of-2 dependence distances. + bool AllowNonPow2Deps = false; + /// Mapping of SCEV expressions to their expanded pointer bounds (pair of /// start and end pointer expressions). DenseMap, diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index d654ac3ec9273..65d9938c8a0cd 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -415,7 +415,7 @@ class LoopVectorizationLegality { /// Return safe power-of-2 number of elements, which do not prevent store-load /// forwarding and safe to operate simultaneously. uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const { - return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits(); + return LAI->getDepChecker().getPowerOf2StoreLoadForwardSafeDistanceInBits(); } /// Returns true if vector representation of the instruction \p I diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index c65bb8be8b996..30fd50bd15303 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1757,7 +1757,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, // Maximum vector factor. uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 = std::min(VectorizerParams::MaxVectorWidth * TypeByteSize, - MaxStoreLoadForwardSafeDistanceInBits); + MaxPowerOf2StoreLoadForwardSafeDistanceInBits); + uint64_t MaxVFWithoutSLForwardIssuesNonPowerOf2 = 0; // Compute the smallest VF at which the store and load would be misaligned. for (uint64_t VF = 2 * TypeByteSize; @@ -1769,24 +1770,61 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, break; } } + // RISCV VLA supports non-power-2 vector factor. So, we iterate in a + // backward order to find largest VF, which allows aligned stores-loads or + // the number of iterations between conflicting memory addresses is not less + // than 8 (NumItersForStoreLoadThroughMemory). + if (AllowNonPow2Deps) { + MaxVFWithoutSLForwardIssuesNonPowerOf2 = + std::min(8 * VectorizerParams::MaxVectorWidth / TypeByteSize, + MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits); + + for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2; + VF > MaxVFWithoutSLForwardIssuesPowerOf2; VF -= TypeByteSize) { + if (Distance % VF == 0 || + Distance / VF >= NumItersForStoreLoadThroughMemory) { + uint64_t GCD = + isSafeForAnyStoreLoadForwardDistances() + ? VF + : std::gcd(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, + VF); + MaxVFWithoutSLForwardIssuesNonPowerOf2 = GCD; + break; + } + } + } - if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) { + if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize && + MaxVFWithoutSLForwardIssuesNonPowerOf2 < 2 * TypeByteSize) { LLVM_DEBUG( dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n"); return true; } + // Handle non-power-2 store-load forwarding distance, power-of-2 distance can + // be calculated. + if (AllowNonPow2Deps && CommonStride && + MaxVFWithoutSLForwardIssuesNonPowerOf2 < + MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits && + MaxVFWithoutSLForwardIssuesNonPowerOf2 != + 8 * VectorizerParams::MaxVectorWidth / TypeByteSize) { + uint64_t MaxVF = MaxVFWithoutSLForwardIssuesNonPowerOf2 / CommonStride; + uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; + MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits = + std::min(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits); + } + if (CommonStride && MaxVFWithoutSLForwardIssuesPowerOf2 < - MaxStoreLoadForwardSafeDistanceInBits && + MaxPowerOf2StoreLoadForwardSafeDistanceInBits && MaxVFWithoutSLForwardIssuesPowerOf2 != VectorizerParams::MaxVectorWidth * TypeByteSize) { uint64_t MaxVF = bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; - MaxStoreLoadForwardSafeDistanceInBits = - std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits); + MaxPowerOf2StoreLoadForwardSafeDistanceInBits = + std::min(MaxPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits); } return false; } @@ -2985,8 +3023,9 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, MaxTargetVectorWidthInBits = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2; - DepChecker = std::make_unique(*PSE, L, SymbolicStrides, - MaxTargetVectorWidthInBits); + DepChecker = std::make_unique( + *PSE, L, SymbolicStrides, MaxTargetVectorWidthInBits, + TTI && TTI->hasActiveVectorLength(0, nullptr, Align())); PtrRtChecking = std::make_unique(*DepChecker, SE); if (canAnalyzeLoop()) CanVecMem = analyzeLoop(AA, LI, TLI, DT); @@ -3000,7 +3039,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { OS << " with a maximum safe vector width of " << DC.getMaxSafeVectorWidthInBits() << " bits"; if (!DC.isSafeForAnyStoreLoadForwardDistances()) { - uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits(); + uint64_t SLDist = DC.getNonPowerOf2StoreLoadForwardSafeDistanceInBits(); + if (SLDist == std::numeric_limits::max()) + SLDist = DC.getPowerOf2StoreLoadForwardSafeDistanceInBits(); OS << ", with a maximum safe store-load forward width of " << SLDist << " bits"; } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll index 79dcfd2c4c08d..15fb79807b965 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='print' -disable-output -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s -; RUN: opt -passes='print' -disable-output -mtriple=x86_64 < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,RISCV64 +; RUN: opt -passes='print' -disable-output -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,X86_64 ; REQUIRES: riscv-registered-target, x86-registered-target @@ -41,21 +41,37 @@ exit: ; Dependence distance is less than trip count, thus we must prove that ; chosen VF guaranteed to be less than dependence distance. define void @test_may_clobber1(ptr %p) { -; CHECK-LABEL: 'test_may_clobber1' -; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 256 bits -; CHECK-NEXT: Dependences: -; CHECK-NEXT: BackwardVectorizable: -; CHECK-NEXT: %v = load i64, ptr %a1, align 32 -> -; CHECK-NEXT: store i64 %v, ptr %a2, align 32 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: +; RISCV64-LABEL: 'test_may_clobber1' +; RISCV64-NEXT: loop: +; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 320 bits +; RISCV64-NEXT: Dependences: +; RISCV64-NEXT: BackwardVectorizable: +; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 -> +; RISCV64-NEXT: store i64 %v, ptr %a2, align 32 +; RISCV64-EMPTY: +; RISCV64-NEXT: Run-time memory checks: +; RISCV64-NEXT: Grouped accesses: +; RISCV64-EMPTY: +; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; RISCV64-NEXT: SCEV assumptions: +; RISCV64-EMPTY: +; RISCV64-NEXT: Expressions re-written: +; +; X86_64-LABEL: 'test_may_clobber1' +; X86_64-NEXT: loop: +; X86_64-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 256 bits +; X86_64-NEXT: Dependences: +; X86_64-NEXT: BackwardVectorizable: +; X86_64-NEXT: %v = load i64, ptr %a1, align 32 -> +; X86_64-NEXT: store i64 %v, ptr %a2, align 32 +; X86_64-EMPTY: +; X86_64-NEXT: Run-time memory checks: +; X86_64-NEXT: Grouped accesses: +; X86_64-EMPTY: +; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; X86_64-NEXT: SCEV assumptions: +; X86_64-EMPTY: +; X86_64-NEXT: Expressions re-written: ; entry: br label %loop @@ -76,22 +92,38 @@ exit: } define void @test_may_clobber2(ptr %p) { -; CHECK-LABEL: 'test_may_clobber2' -; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: -; CHECK-NEXT: %v = load i64, ptr %a1, align 32 -> -; CHECK-NEXT: store i64 %v, ptr %a2, align 32 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: +; RISCV64-LABEL: 'test_may_clobber2' +; RISCV64-NEXT: loop: +; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 576 bits, with a maximum safe store-load forward width of 192 bits +; RISCV64-NEXT: Dependences: +; RISCV64-NEXT: BackwardVectorizable: +; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 -> +; RISCV64-NEXT: store i64 %v, ptr %a2, align 32 +; RISCV64-EMPTY: +; RISCV64-NEXT: Run-time memory checks: +; RISCV64-NEXT: Grouped accesses: +; RISCV64-EMPTY: +; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; RISCV64-NEXT: SCEV assumptions: +; RISCV64-EMPTY: +; RISCV64-NEXT: Expressions re-written: +; +; X86_64-LABEL: 'test_may_clobber2' +; X86_64-NEXT: loop: +; X86_64-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; X86_64-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. +; X86_64-NEXT: Dependences: +; X86_64-NEXT: BackwardVectorizableButPreventsForwarding: +; X86_64-NEXT: %v = load i64, ptr %a1, align 32 -> +; X86_64-NEXT: store i64 %v, ptr %a2, align 32 +; X86_64-EMPTY: +; X86_64-NEXT: Run-time memory checks: +; X86_64-NEXT: Grouped accesses: +; X86_64-EMPTY: +; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; X86_64-NEXT: SCEV assumptions: +; X86_64-EMPTY: +; X86_64-NEXT: Expressions re-written: ; entry: br label %loop @@ -112,21 +144,37 @@ exit: } define void @test_may_clobber3(ptr %p) { -; CHECK-LABEL: 'test_may_clobber3' -; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 128 bits -; CHECK-NEXT: Dependences: -; CHECK-NEXT: BackwardVectorizable: -; CHECK-NEXT: %v = load i64, ptr %a1, align 32 -> -; CHECK-NEXT: store i64 %v, ptr %a2, align 32 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: +; RISCV64-LABEL: 'test_may_clobber3' +; RISCV64-NEXT: loop: +; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 320 bits +; RISCV64-NEXT: Dependences: +; RISCV64-NEXT: BackwardVectorizable: +; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 -> +; RISCV64-NEXT: store i64 %v, ptr %a2, align 32 +; RISCV64-EMPTY: +; RISCV64-NEXT: Run-time memory checks: +; RISCV64-NEXT: Grouped accesses: +; RISCV64-EMPTY: +; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; RISCV64-NEXT: SCEV assumptions: +; RISCV64-EMPTY: +; RISCV64-NEXT: Expressions re-written: +; +; X86_64-LABEL: 'test_may_clobber3' +; X86_64-NEXT: loop: +; X86_64-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 128 bits +; X86_64-NEXT: Dependences: +; X86_64-NEXT: BackwardVectorizable: +; X86_64-NEXT: %v = load i64, ptr %a1, align 32 -> +; X86_64-NEXT: store i64 %v, ptr %a2, align 32 +; X86_64-EMPTY: +; X86_64-NEXT: Run-time memory checks: +; X86_64-NEXT: Grouped accesses: +; X86_64-EMPTY: +; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; X86_64-NEXT: SCEV assumptions: +; X86_64-EMPTY: +; X86_64-NEXT: Expressions re-written: ; entry: br label %loop @@ -215,26 +263,46 @@ exit: } define void @non_power_2_storeloadforward(ptr %A) { -; CHECK-LABEL: 'non_power_2_storeloadforward' -; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Forward: -; CHECK-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 -> -; CHECK-NEXT: store i32 %add3, ptr %gep.iv, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: -; CHECK-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 -> -; CHECK-NEXT: store i32 %add3, ptr %gep.iv, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: +; RISCV64-LABEL: 'non_power_2_storeloadforward' +; RISCV64-NEXT: loop: +; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 96 bits, with a maximum safe store-load forward width of 96 bits +; RISCV64-NEXT: Dependences: +; RISCV64-NEXT: Forward: +; RISCV64-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 -> +; RISCV64-NEXT: store i32 %add3, ptr %gep.iv, align 4 +; RISCV64-EMPTY: +; RISCV64-NEXT: BackwardVectorizable: +; RISCV64-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 -> +; RISCV64-NEXT: store i32 %add3, ptr %gep.iv, align 4 +; RISCV64-EMPTY: +; RISCV64-NEXT: Run-time memory checks: +; RISCV64-NEXT: Grouped accesses: +; RISCV64-EMPTY: +; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; RISCV64-NEXT: SCEV assumptions: +; RISCV64-EMPTY: +; RISCV64-NEXT: Expressions re-written: +; +; X86_64-LABEL: 'non_power_2_storeloadforward' +; X86_64-NEXT: loop: +; X86_64-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; X86_64-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. +; X86_64-NEXT: Dependences: +; X86_64-NEXT: Forward: +; X86_64-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 -> +; X86_64-NEXT: store i32 %add3, ptr %gep.iv, align 4 +; X86_64-EMPTY: +; X86_64-NEXT: BackwardVectorizableButPreventsForwarding: +; X86_64-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 -> +; X86_64-NEXT: store i32 %add3, ptr %gep.iv, align 4 +; X86_64-EMPTY: +; X86_64-NEXT: Run-time memory checks: +; X86_64-NEXT: Grouped accesses: +; X86_64-EMPTY: +; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; X86_64-NEXT: SCEV assumptions: +; X86_64-EMPTY: +; X86_64-NEXT: Expressions re-written: ; entry: br label %loop