diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 92304edd67a44..af4e54e1bec9f 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -183,10 +183,13 @@ class MemoryDepChecker { MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC, DominatorTree *DT, const Loop *L, const DenseMap &SymbolicStrides, - unsigned MaxTargetVectorWidthInBits) + unsigned MaxTargetVectorWidthInBits, + bool AllowNonPow2StoreLoadForwardDistance) : PSE(PSE), AC(AC), DT(DT), InnermostLoop(L), SymbolicStrides(SymbolicStrides), - MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {} + MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits), + AllowNonPow2StoreLoadForwardDistance( + AllowNonPow2StoreLoadForwardDistance) {} /// Register the location (instructions are given increasing numbers) /// of a write access. @@ -223,17 +226,29 @@ class MemoryDepChecker { /// Return true if there are no store-load forwarding dependencies. bool isSafeForAnyStoreLoadForwardDistances() const { - return MaxStoreLoadForwardSafeDistanceInBits == - std::numeric_limits::max(); + return MaxPowerOf2StoreLoadForwardSafeDistanceInBits == + std::numeric_limits::max() && + MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits == + std::numeric_limits::max(); } - /// Return safe power-of-2 number of elements, which do not prevent store-load - /// forwarding, multiplied by the size of the elements in bits. - uint64_t getStoreLoadForwardSafeDistanceInBits() const { + /// Return safe number of elements, which do not prevent store-load + /// forwarding, multiplied by the size of the elements in bits (power-of-2). + uint64_t getPowerOf2StoreLoadForwardSafeDistanceInBits() const { assert(!isSafeForAnyStoreLoadForwardDistances() && "Expected the distance, that prevent store-load forwarding, to be " "set."); - return MaxStoreLoadForwardSafeDistanceInBits; + return MaxPowerOf2StoreLoadForwardSafeDistanceInBits; + } + + /// Return safe number of elements, which do not prevent store-load + /// forwarding, multiplied by the size of the elements in bits + /// (non-power-of-2). + uint64_t getNonPowerOf2StoreLoadForwardSafeDistanceInBits() const { + assert(!isSafeForAnyStoreLoadForwardDistances() && + "Expected the distance, that prevent store-load forwarding, to be " + "set."); + return MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits; } /// In same cases when the dependency check fails we can still @@ -337,9 +352,14 @@ class MemoryDepChecker { /// restrictive. uint64_t MaxSafeVectorWidthInBits = -1U; - /// Maximum power-of-2 number of elements, which do not prevent store-load - /// forwarding, multiplied by the size of the elements in bits. - uint64_t MaxStoreLoadForwardSafeDistanceInBits = + /// Maximum number of elements, which do not prevent store-load forwarding, + /// multiplied by the size of the elements in bits (power-of-2). + uint64_t MaxPowerOf2StoreLoadForwardSafeDistanceInBits = + std::numeric_limits::max(); + + /// Maximum number of elements, which do not prevent store-load forwarding, + /// multiplied by the size of the elements in bits (non-power-of-2). + uint64_t MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits = std::numeric_limits::max(); /// Whether we should try to vectorize the loop with runtime checks, if the @@ -366,6 +386,10 @@ class MemoryDepChecker { /// backwards-vectorizable or unknown (triggering a runtime check). unsigned MaxTargetVectorWidthInBits = 0; + /// True if current target supports non-power-of-2 dependence distances, + /// allows to support non-power-of-2 store-load forwarding distance analysis. + bool AllowNonPow2StoreLoadForwardDistance = false; + /// Mapping of SCEV expressions to their expanded pointer bounds (pair of /// start and end pointer expressions). DenseMap, diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 43ff084816d18..65630174d0b0a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -415,7 +415,7 @@ class LoopVectorizationLegality { /// Return safe power-of-2 number of elements, which do not prevent store-load /// forwarding and safe to operate simultaneously. uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const { - return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits(); + return LAI->getDepChecker().getPowerOf2StoreLoadForwardSafeDistanceInBits(); } /// Returns true if vector representation of the instruction \p I diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index a5535339a714f..759d8ed2300ef 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1838,7 +1838,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, // Maximum vector factor. uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 = std::min(VectorizerParams::MaxVectorWidth * TypeByteSize, - MaxStoreLoadForwardSafeDistanceInBits); + MaxPowerOf2StoreLoadForwardSafeDistanceInBits); + uint64_t MaxVFWithoutSLForwardIssuesNonPowerOf2 = 0; // Compute the smallest VF at which the store and load would be misaligned. for (uint64_t VF = 2 * TypeByteSize; @@ -1850,24 +1851,68 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, break; } } + // If target supports non-power-of-2 store-load forwarding distances, then it + // supports non-power-of-2 vector factor. So, we iterate in a backward order + // to find largest VF, which allows aligned stores-loads or the number of + // iterations between conflicting memory addresses is not less than 8 + // (NumItersForStoreLoadThroughMemory). If the HW supports non-power-of-2 + // store-load forwarding distance, we can choose any vector factor, which is + // the whole divider of the MaxVFWithoutSLForwardIssuesNonPowerOf2. Say, if + // MaxVFWithoutSLForwardIssuesNonPowerOf2 is 9, then we can use vector factors + // 3 and 9. If it is 6, we can use vector factors 2, 3, 6. All these are safe. + if (AllowNonPow2StoreLoadForwardDistance) { + MaxVFWithoutSLForwardIssuesNonPowerOf2 = + std::min(8 * VectorizerParams::MaxVectorWidth / TypeByteSize, + MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits); + + const bool IsSafeForAnyStoreLoadForwardDistances = + isSafeForAnyStoreLoadForwardDistances(); + for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2; + VF > MaxVFWithoutSLForwardIssuesPowerOf2; VF -= TypeByteSize) { + if (Distance % VF == 0 || + Distance / VF >= NumItersForStoreLoadThroughMemory) { + uint64_t GCD = + IsSafeForAnyStoreLoadForwardDistances + ? VF + : std::gcd(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, + VF); + MaxVFWithoutSLForwardIssuesNonPowerOf2 = GCD; + break; + } + } + } - if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) { + if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize && + MaxVFWithoutSLForwardIssuesNonPowerOf2 < 2 * TypeByteSize) { LLVM_DEBUG( dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n"); return true; } + // Handle non-power-2 store-load forwarding distance, power-of-2 distance can + // be calculated. + if (AllowNonPow2StoreLoadForwardDistance && CommonStride && + MaxVFWithoutSLForwardIssuesNonPowerOf2 < + MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits && + MaxVFWithoutSLForwardIssuesNonPowerOf2 != + 8 * VectorizerParams::MaxVectorWidth / TypeByteSize) { + uint64_t MaxVF = MaxVFWithoutSLForwardIssuesNonPowerOf2 / CommonStride; + uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; + MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits = + std::min(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits); + } + if (CommonStride && MaxVFWithoutSLForwardIssuesPowerOf2 < - MaxStoreLoadForwardSafeDistanceInBits && + MaxPowerOf2StoreLoadForwardSafeDistanceInBits && MaxVFWithoutSLForwardIssuesPowerOf2 != VectorizerParams::MaxVectorWidth * TypeByteSize) { uint64_t MaxVF = bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; - MaxStoreLoadForwardSafeDistanceInBits = - std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits); + MaxPowerOf2StoreLoadForwardSafeDistanceInBits = + std::min(MaxPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits); } return false; } @@ -3034,7 +3079,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2; DepChecker = std::make_unique( - *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits); + *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, + TTI && TTI->hasActiveVectorLength()); PtrRtChecking = std::make_unique(*DepChecker, SE); if (canAnalyzeLoop()) CanVecMem = analyzeLoop(AA, LI, TLI, DT); @@ -3048,7 +3094,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { OS << " with a maximum safe vector width of " << DC.getMaxSafeVectorWidthInBits() << " bits"; if (!DC.isSafeForAnyStoreLoadForwardDistances()) { - uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits(); + uint64_t SLDist = DC.getNonPowerOf2StoreLoadForwardSafeDistanceInBits(); + if (SLDist == std::numeric_limits::max()) + SLDist = DC.getPowerOf2StoreLoadForwardSafeDistanceInBits(); OS << ", with a maximum safe store-load forward width of " << SLDist << " bits"; } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll index 79dcfd2c4c08d..15fb79807b965 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='print' -disable-output -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s -; RUN: opt -passes='print' -disable-output -mtriple=x86_64 < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,RISCV64 +; RUN: opt -passes='print' -disable-output -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,X86_64 ; REQUIRES: riscv-registered-target, x86-registered-target @@ -41,21 +41,37 @@ exit: ; Dependence distance is less than trip count, thus we must prove that ; chosen VF guaranteed to be less than dependence distance. define void @test_may_clobber1(ptr %p) { -; CHECK-LABEL: 'test_may_clobber1' -; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 256 bits -; CHECK-NEXT: Dependences: -; CHECK-NEXT: BackwardVectorizable: -; CHECK-NEXT: %v = load i64, ptr %a1, align 32 -> -; CHECK-NEXT: store i64 %v, ptr %a2, align 32 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: +; RISCV64-LABEL: 'test_may_clobber1' +; RISCV64-NEXT: loop: +; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 320 bits +; RISCV64-NEXT: Dependences: +; RISCV64-NEXT: BackwardVectorizable: +; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 -> +; RISCV64-NEXT: store i64 %v, ptr %a2, align 32 +; RISCV64-EMPTY: +; RISCV64-NEXT: Run-time memory checks: +; RISCV64-NEXT: Grouped accesses: +; RISCV64-EMPTY: +; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; RISCV64-NEXT: SCEV assumptions: +; RISCV64-EMPTY: +; RISCV64-NEXT: Expressions re-written: +; +; X86_64-LABEL: 'test_may_clobber1' +; X86_64-NEXT: loop: +; X86_64-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 256 bits +; X86_64-NEXT: Dependences: +; X86_64-NEXT: BackwardVectorizable: +; X86_64-NEXT: %v = load i64, ptr %a1, align 32 -> +; X86_64-NEXT: store i64 %v, ptr %a2, align 32 +; X86_64-EMPTY: +; X86_64-NEXT: Run-time memory checks: +; X86_64-NEXT: Grouped accesses: +; X86_64-EMPTY: +; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; X86_64-NEXT: SCEV assumptions: +; X86_64-EMPTY: +; X86_64-NEXT: Expressions re-written: ; entry: br label %loop @@ -76,22 +92,38 @@ exit: } define void @test_may_clobber2(ptr %p) { -; CHECK-LABEL: 'test_may_clobber2' -; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: -; CHECK-NEXT: %v = load i64, ptr %a1, align 32 -> -; CHECK-NEXT: store i64 %v, ptr %a2, align 32 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: +; RISCV64-LABEL: 'test_may_clobber2' +; RISCV64-NEXT: loop: +; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 576 bits, with a maximum safe store-load forward width of 192 bits +; RISCV64-NEXT: Dependences: +; RISCV64-NEXT: BackwardVectorizable: +; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 -> +; RISCV64-NEXT: store i64 %v, ptr %a2, align 32 +; RISCV64-EMPTY: +; RISCV64-NEXT: Run-time memory checks: +; RISCV64-NEXT: Grouped accesses: +; RISCV64-EMPTY: +; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; RISCV64-NEXT: SCEV assumptions: +; RISCV64-EMPTY: +; RISCV64-NEXT: Expressions re-written: +; +; X86_64-LABEL: 'test_may_clobber2' +; X86_64-NEXT: loop: +; X86_64-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; X86_64-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. +; X86_64-NEXT: Dependences: +; X86_64-NEXT: BackwardVectorizableButPreventsForwarding: +; X86_64-NEXT: %v = load i64, ptr %a1, align 32 -> +; X86_64-NEXT: store i64 %v, ptr %a2, align 32 +; X86_64-EMPTY: +; X86_64-NEXT: Run-time memory checks: +; X86_64-NEXT: Grouped accesses: +; X86_64-EMPTY: +; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; X86_64-NEXT: SCEV assumptions: +; X86_64-EMPTY: +; X86_64-NEXT: Expressions re-written: ; entry: br label %loop @@ -112,21 +144,37 @@ exit: } define void @test_may_clobber3(ptr %p) { -; CHECK-LABEL: 'test_may_clobber3' -; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 128 bits -; CHECK-NEXT: Dependences: -; CHECK-NEXT: BackwardVectorizable: -; CHECK-NEXT: %v = load i64, ptr %a1, align 32 -> -; CHECK-NEXT: store i64 %v, ptr %a2, align 32 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: +; RISCV64-LABEL: 'test_may_clobber3' +; RISCV64-NEXT: loop: +; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 320 bits +; RISCV64-NEXT: Dependences: +; RISCV64-NEXT: BackwardVectorizable: +; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 -> +; RISCV64-NEXT: store i64 %v, ptr %a2, align 32 +; RISCV64-EMPTY: +; RISCV64-NEXT: Run-time memory checks: +; RISCV64-NEXT: Grouped accesses: +; RISCV64-EMPTY: +; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; RISCV64-NEXT: SCEV assumptions: +; RISCV64-EMPTY: +; RISCV64-NEXT: Expressions re-written: +; +; X86_64-LABEL: 'test_may_clobber3' +; X86_64-NEXT: loop: +; X86_64-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 128 bits +; X86_64-NEXT: Dependences: +; X86_64-NEXT: BackwardVectorizable: +; X86_64-NEXT: %v = load i64, ptr %a1, align 32 -> +; X86_64-NEXT: store i64 %v, ptr %a2, align 32 +; X86_64-EMPTY: +; X86_64-NEXT: Run-time memory checks: +; X86_64-NEXT: Grouped accesses: +; X86_64-EMPTY: +; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; X86_64-NEXT: SCEV assumptions: +; X86_64-EMPTY: +; X86_64-NEXT: Expressions re-written: ; entry: br label %loop @@ -215,26 +263,46 @@ exit: } define void @non_power_2_storeloadforward(ptr %A) { -; CHECK-LABEL: 'non_power_2_storeloadforward' -; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. -; CHECK-NEXT: Dependences: -; CHECK-NEXT: Forward: -; CHECK-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 -> -; CHECK-NEXT: store i32 %add3, ptr %gep.iv, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: -; CHECK-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 -> -; CHECK-NEXT: store i32 %add3, ptr %gep.iv, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Grouped accesses: -; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. -; CHECK-NEXT: SCEV assumptions: -; CHECK-EMPTY: -; CHECK-NEXT: Expressions re-written: +; RISCV64-LABEL: 'non_power_2_storeloadforward' +; RISCV64-NEXT: loop: +; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 96 bits, with a maximum safe store-load forward width of 96 bits +; RISCV64-NEXT: Dependences: +; RISCV64-NEXT: Forward: +; RISCV64-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 -> +; RISCV64-NEXT: store i32 %add3, ptr %gep.iv, align 4 +; RISCV64-EMPTY: +; RISCV64-NEXT: BackwardVectorizable: +; RISCV64-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 -> +; RISCV64-NEXT: store i32 %add3, ptr %gep.iv, align 4 +; RISCV64-EMPTY: +; RISCV64-NEXT: Run-time memory checks: +; RISCV64-NEXT: Grouped accesses: +; RISCV64-EMPTY: +; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; RISCV64-NEXT: SCEV assumptions: +; RISCV64-EMPTY: +; RISCV64-NEXT: Expressions re-written: +; +; X86_64-LABEL: 'non_power_2_storeloadforward' +; X86_64-NEXT: loop: +; X86_64-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; X86_64-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. +; X86_64-NEXT: Dependences: +; X86_64-NEXT: Forward: +; X86_64-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 -> +; X86_64-NEXT: store i32 %add3, ptr %gep.iv, align 4 +; X86_64-EMPTY: +; X86_64-NEXT: BackwardVectorizableButPreventsForwarding: +; X86_64-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 -> +; X86_64-NEXT: store i32 %add3, ptr %gep.iv, align 4 +; X86_64-EMPTY: +; X86_64-NEXT: Run-time memory checks: +; X86_64-NEXT: Grouped accesses: +; X86_64-EMPTY: +; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop. +; X86_64-NEXT: SCEV assumptions: +; X86_64-EMPTY: +; X86_64-NEXT: Expressions re-written: ; entry: br label %loop