diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 60f92735055bc..405d4a742f37b 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -251,15 +251,18 @@ struct HistogramInfo { /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: - LoopVectorizationLegality( - Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, - TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F, - LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE, - LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, - AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI) + LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, + DominatorTree *DT, TargetTransformInfo *TTI, + TargetLibraryInfo *TLI, Function *F, + LoopAccessInfoManager &LAIs, LoopInfo *LI, + OptimizationRemarkEmitter *ORE, + LoopVectorizationRequirements *R, + LoopVectorizeHints *H, DemandedBits *DB, + AssumptionCache *AC, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, AAResults *AA) : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs), - ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), - PSI(PSI) {} + ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), PSI(PSI), + AA(AA) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. @@ -407,6 +410,14 @@ class LoopVectorizationLegality { return UncountableExitingBB; } + /// Returns true if this is an early exit loop with state-changing or + /// potentially-faulting operations and the condition for the uncountable + /// exit must be determined before any of the state changes or potentially + /// faulting operations take place. + bool hasUncountableExitWithSideEffects() const { + return UncountableExitWithSideEffects; + } + /// Return true if there is store-load forwarding dependencies. bool isSafeForAnyStoreLoadForwardDistances() const { return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances(); @@ -524,20 +535,87 @@ class LoopVectorizationLegality { /// Returns true if this is an early exit loop that can be vectorized. /// Currently, a loop with an uncountable early exit is considered /// vectorizable if: - /// 1. There are no writes to memory in the loop. + /// 1. Writes to memory will access different underlying objects than + /// any load used as part of the uncountable exit condition. /// 2. The loop has only one early uncountable exit /// 3. The early exit block dominates the latch block. /// 4. The latch block has an exact exit count. /// 5. The loop does not contain reductions or recurrences. /// 6. We can prove at compile-time that loops will not contain faulting - /// loads. + /// loads, or that any faulting loads would also occur in a purely + /// scalar loop. /// 7. It is safe to speculatively execute instructions such as divide or - /// call instructions. + /// call instructions. /// The list above is not based on theoretical limitations of vectorization, /// but simply a statement that more work is needed to support these /// additional cases safely. bool isVectorizableEarlyExitLoop(); + /// When vectorizing an early exit loop containing side effects, we need to + /// determine whether an uncounted exit will be taken before any operation + /// that has side effects. + /// + /// Consider a loop like the following: + /// for (int i = 0; i < N; ++i) { + /// a[i] = b[i]; + /// if (c[i] == 0) + /// break; + /// } + /// + /// We have both a load and a store operation occurring before the condition + /// is checked for early termination. We could potentially restrict + /// vectorization to cases where we know all addresses are guaranteed to be + /// dereferenceable, which would allow the load before the condition check to + /// be vectorized. + /// + /// The store, however, should not execute across all lanes if early + /// termination occurs before the end of the vector. We must only store to the + /// locations that would have been stored to by a scalar loop. So we need to + /// know what the result of 'c[i] == 0' is before performing the vector store, + /// with or without masking. + /// + /// We can either do this by moving the condition load to the top of the + /// vector body and using the comparison to create masks for other operations + /// in the loop, or by looking ahead one vector iteration and bailing out to + /// the scalar loop if an exit would occur. + /// + /// Using the latter approach (applicable to more targets), we need to hoist + /// the first load (of c[0]) out of the loop then rotate the load within the + /// loop to the next iteration, remembering to adjust the vector trip count. + /// Something like the following: + /// + /// vec.ph: + /// %ci.0 = load <4 x i32>, ptr %c + /// %cmp.0 = icmp eq <4 x i32> %ci.0, zeroinitializer + /// %any.of.0 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.0) + /// br i1 %any.of.0, label %scalar.ph, label %vec.body + /// vec.body: + /// %iv = phi... + /// phi for c[i] if used elsewhere in the loop... + /// other operations in the loop... + /// %iv.next = add i64 %iv, 4 + /// %addr.next = getelementptr i32, ptr %c, i64 %iv.next + /// %ci.next = load <4 x i32>, ptr %addr.next + /// %cmp.next = icmp eq <4 x i32> %ci.next, zeroinitializer + /// %any.of.next = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.next) + /// iv.next compared with shortened vector tripcount... + /// uncountable condition combined with counted condition... + /// br... + /// + /// Doing this means the last few iterations will always be performed by a + /// scalar loop regardless of which exit is taken, and so vector iterations + /// will never execute a memory operation to a location that the scalar loop + /// would not have. + /// + /// This means we must ensure that it is safe to move the load for 'c[i]' + /// before other memory operations (or any other observable side effects) in + /// the loop. + /// + /// Currently, c[i] must have only one user (the comparison used for the + /// uncountable exit) since we would otherwise need to introduce a PHI node + /// for it. + bool canUncountableExitConditionLoadBeMoved(BasicBlock *ExitingBlock); + /// Return true if all of the instructions in the block can be speculatively /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know @@ -646,6 +724,10 @@ class LoopVectorizationLegality { BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; + // Alias Analysis results used to check for possible aliasing with loads + // used in uncountable exit conditions. + AAResults *AA; + /// If we discover function calls within the loop which have a valid /// vectorized variant, record that fact so that LoopVectorize can /// (potentially) make a better decision on the maximum VF and enable @@ -659,6 +741,10 @@ class LoopVectorizationLegality { /// Keep track of an uncountable exiting block, if there is exactly one early /// exit. BasicBlock *UncountableExitingBB = nullptr; + + /// If true, the loop has at least one uncountable exit and operations within + /// the loop may have observable side effects. + bool UncountableExitWithSideEffects = false; }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h index db1971aca4bff..bdc2a0dad8622 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -152,6 +152,7 @@ struct LoopVectorizePass : public PassInfoMixin { LoopAccessInfoManager *LAIs; OptimizationRemarkEmitter *ORE; ProfileSummaryInfo *PSI; + AAResults *AA; LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); LLVM_ABI void diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 528ae3822d078..2704e66f3a703 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -15,8 +15,10 @@ // #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -1223,8 +1225,18 @@ bool LoopVectorizationLegality::canVectorizeMemory() { }); } - if (!LAI->canVectorizeMemory()) + if (!LAI->canVectorizeMemory()) { + if (hasUncountableExitWithSideEffects()) { + reportVectorizationFailure( + "Cannot vectorize unsafe dependencies in uncountable exit loop with " + "side effects", + "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, + TheLoop); + return false; + } + return canVectorizeIndirectUnsafeDependences(); + } if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) { reportVectorizationFailure("We don't allow storing to uniform addresses", @@ -1755,16 +1767,24 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { } }; + bool HasSideEffects = false; for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { if (I.mayWriteToMemory()) { - // We don't support writes to memory. + if (isa(&I) && cast(&I)->isSimple()) { + HasSideEffects = true; + continue; + } + + // We don't support complex writes to memory. reportVectorizationFailure( - "Writes to memory unsupported in early exit loops", - "Cannot vectorize early exit loop with writes to memory", + "Complex writes to memory unsupported in early exit loops", + "Cannot vectorize early exit loop with complex writes to memory", "WritesInEarlyExitLoop", ORE, TheLoop); return false; - } else if (!IsSafeOperation(&I)) { + } + + if (!IsSafeOperation(&I)) { reportVectorizationFailure("Early exit loop contains operations that " "cannot be speculatively executed", "UnsafeOperationsEarlyExitLoop", ORE, @@ -1777,15 +1797,22 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock && "Expected latch predecessor to be the early exiting block"); - Predicates.clear(); SmallVector NonDerefLoads; - if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads, - &Predicates)) { - reportVectorizationFailure("Loop may fault", - "Cannot vectorize non-read-only early exit loop", - "NonReadOnlyEarlyExitLoop", ORE, TheLoop); + // TODO: Handle loops that may fault. + if (!HasSideEffects) { + // Read-only loop. + Predicates.clear(); + if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads, + &Predicates)) { + reportVectorizationFailure( + "Loop may fault", "Cannot vectorize non-read-only early exit loop", + "NonReadOnlyEarlyExitLoop", ORE, TheLoop); + return false; + } + } else if (!canUncountableExitConditionLoadBeMoved( + SingleUncountableExitingBlock)) return false; - } + // Check non-dereferenceable loads if any. for (LoadInst *LI : NonDerefLoads) { // Only support unit-stride access for now. @@ -1813,6 +1840,99 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { "backedge taken count: " << *SymbolicMaxBTC << '\n'); UncountableExitingBB = SingleUncountableExitingBlock; + UncountableExitWithSideEffects = HasSideEffects; + return true; +} + +bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved( + BasicBlock *ExitingBlock) { + // Try to find a load in the critical path for the uncountable exit condition. + // This is currently matching about the simplest form we can, expecting + // only one in-loop load, the result of which is directly compared against + // a loop-invariant value. + // FIXME: We're insisting on a single use for now, because otherwise we will + // need to make PHI nodes for other users. That can be done once the initial + // transform code lands. + auto *Br = cast(ExitingBlock->getTerminator()); + + using namespace llvm::PatternMatch; + Instruction *L = nullptr; + Value *Ptr = nullptr; + Value *R = nullptr; + if (!match(Br->getCondition(), + m_OneUse(m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))), + m_Value(R))))) { + reportVectorizationFailure( + "Early exit loop with store but no supported condition load", + "NoConditionLoadForEarlyExitLoop", ORE, TheLoop); + return false; + } + + // FIXME: Don't rely on operand ordering for the comparison. + if (!TheLoop->isLoopInvariant(R)) { + reportVectorizationFailure( + "Early exit loop with store but no supported condition load", + "NoConditionLoadForEarlyExitLoop", ORE, TheLoop); + return false; + } + + // Make sure that the load address is not loop invariant; we want an + // address calculation that we can rotate to the next vector iteration. + const SCEV *PtrScev = PSE.getSE()->getSCEV(Ptr); + if (!isa(PtrScev)) { + reportVectorizationFailure( + "Uncountable exit condition depends on load with an address that is " + "not an add recurrence", + "EarlyExitLoadInvariantAddress", ORE, TheLoop); + return false; + } + + // FIXME: Support gathers after first-faulting load support lands. + SmallVector Predicates; + LoadInst *Load = cast(L); + if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC, + &Predicates)) { + reportVectorizationFailure( + "Loop may fault", + "Cannot vectorize potentially faulting early exit loop", + "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + return false; + } + + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(TheLoop); + // We need to know that load will be executed before we can hoist a + // copy out to run just before the first iteration. + // FIXME: Currently, other restrictions prevent us from reaching this point + // with a loop where the uncountable exit condition is determined + // by a conditional load. + assert(SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop) && + "Unhandled control flow in uncountable exit loop with side effects"); + + // Prohibit any potential aliasing with any instruction in the loop which + // might store to memory. + // FIXME: Relax this constraint where possible. + for (auto *BB : TheLoop->blocks()) { + for (auto &I : *BB) { + if (&I == Load) + continue; + + if (I.mayWriteToMemory()) { + if (auto *SI = dyn_cast(&I)) { + AliasResult AR = AA->alias(Ptr, SI->getPointerOperand()); + if (AR == AliasResult::NoAlias) + continue; + } + + reportVectorizationFailure( + "Cannot determine whether critical uncountable exit load address " + "does not alias with a memory write", + "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop); + return false; + } + } + } + return true; } @@ -1885,6 +2005,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { } else { if (!isVectorizableEarlyExitLoop()) { assert(!hasUncountableEarlyExit() && + !hasUncountableExitWithSideEffects() && "Must be false without vectorizable early-exit loop"); if (DoExtraAnalysis) Result = false; @@ -1903,6 +2024,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return false; } + // Bail out for state-changing loops with uncountable exits for now. + if (UncountableExitWithSideEffects) { + reportVectorizationFailure( + "Writes to memory unsupported in early exit loops", + "Cannot vectorize early exit loop with writes to memory", + "WritesInEarlyExitLoop", ORE, TheLoop); + return false; + } + if (Result) { LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" << (LAI->getRuntimePointerChecking()->Need diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index dd4b3f8e3077b..10e056b14baac 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9811,7 +9811,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements; LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, F, *LAIs, LI, ORE, - &Requirements, &Hints, DB, AC, BFI, PSI); + &Requirements, &Hints, DB, AC, BFI, PSI, AA); if (!LVL.canVectorize(EnableVPlanNativePath)) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); Hints.emitRemarkWithHints(); @@ -10248,6 +10248,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, DB = &AM.getResult(F); ORE = &AM.getResult(F); LAIs = &AM.getResult(F); + AA = &AM.getResult(F); auto &MAMProxy = AM.getResult(F); PSI = MAMProxy.getCachedResult(*F.getParent()); diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll index 3a8aec34dfe43..61836e4a29d58 100644 --- a/llvm/test/Transforms/LoopVectorize/control-flow.ll +++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll @@ -10,7 +10,7 @@ ; return 0; ; } -; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory +; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize potentially faulting early exit loop ; CHECK: remark: source.cpp:5:9: loop not vectorized ; CHECK: _Z4testPii diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll index 84d5ceeb601b6..82b44adc6df77 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll @@ -3,7 +3,7 @@ define i64 @loop_contains_store(ptr %dest) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops +; CHECK: LV: Not vectorizing: Early exit loop with store but no supported condition load. entry: %p1 = alloca [1024 x i8] call void @init_mem(ptr %p1, i64 1024) @@ -56,7 +56,7 @@ exit: define void @loop_contains_store_ee_condition_is_invariant(ptr dereferenceable(40) noalias %array, i16 %ee.val) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_ee_condition_is_invariant' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Early exit loop with store but no supported condition load. entry: br label %for.body @@ -80,7 +80,7 @@ exit: define void @loop_contains_store_fcmp_condition(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp_condition' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Early exit loop with store but no supported condition load. entry: br label %for.body @@ -106,7 +106,7 @@ exit: define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write. entry: %pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8 br label %for.body @@ -135,7 +135,7 @@ exit: define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Loop may fault. entry: %unknown.offset = call i64 @get_an_unknown_offset() %unknown.cmp = icmp ult i64 %unknown.offset, 20 @@ -149,10 +149,10 @@ for.body: %data = load i16, ptr %st.addr, align 2 %inc = add nsw i16 %data, 1 store i16 %inc, ptr %st.addr, align 2 - %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv + %ee.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv %ee.val = load i16, ptr %ee.addr, align 2 %ee.cond = icmp sgt i16 %ee.val, 500 - %some.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv + %some.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv store i16 42, ptr %some.addr, align 2 br i1 %ee.cond, label %exit, label %for.inc @@ -167,7 +167,7 @@ exit: define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Loop may fault. entry: %n_bytes = mul nuw nsw i32 %n, 2 call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ] @@ -223,7 +223,7 @@ exit: define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Loop may fault. entry: br label %for.body @@ -249,7 +249,7 @@ exit: define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Complex writes to memory unsupported in early exit loops. entry: br label %for.body @@ -327,7 +327,7 @@ exit: define void @loop_contains_store_requiring_alias_check(ptr dereferenceable(40) %array, ptr align 2 dereferenceable(40) %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_requiring_alias_check' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write. entry: br label %for.body @@ -353,7 +353,7 @@ exit: define void @loop_contains_store_condition_load_is_chained(ptr dereferenceable(40) noalias %array, ptr align 8 dereferenceable(160) readonly %offsets, ptr align 2 dereferenceable(40) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_is_chained' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Uncountable exit condition depends on load with an address that is not an add recurrence. entry: br label %for.body @@ -405,5 +405,167 @@ exit: ret void } +define void @loop_contains_store_condition_load_requires_gather(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(512) readonly %pred, ptr align 1 dereferenceable(20) readonly %offsets) { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_requires_gather' +; CHECK: LV: Not vectorizing: Uncountable exit condition depends on load with an address that is not an add recurrence. +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv + %data = load i16, ptr %st.addr, align 2 + %inc = add nsw i16 %data, 1 + store i16 %inc, ptr %st.addr, align 2 + %offset.addr = getelementptr inbounds nuw i8, ptr %offsets, i64 %iv + %offset = load i8, ptr %offset.addr, align 1 + %offset.zext = zext i8 %offset to i64 + %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %offset.zext + %ee.val = load i16, ptr %ee.addr, align 2 + %ee.cond = icmp sgt i16 %ee.val, 500 + br i1 %ee.cond, label %exit, label %for.inc + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %counted.cond = icmp eq i64 %iv.next, 20 + br i1 %counted.cond, label %exit, label %for.body + +exit: + ret void +} + +define void @loop_contains_store_uncounted_exit_is_a_switch(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_uncounted_exit_is_a_switch' +; CHECK: LV: Not vectorizing: Loop contains an unsupported switch +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv + %data = load i16, ptr %st.addr, align 2 + %inc = add nsw i16 %data, 1 + store i16 %inc, ptr %st.addr, align 2 + %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv + %ee.val = load i16, ptr %ee.addr, align 2 + switch i16 %ee.val, label %for.inc [ i16 500, label %exit ] + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %counted.cond = icmp eq i64 %iv.next, 20 + br i1 %counted.cond, label %exit, label %for.body + +exit: + ret void +} + +define void @loop_contains_store_uncounted_exit_is_not_guaranteed_to_execute(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_uncounted_exit_is_not_guaranteed_to_execute' +; CHECK: LV: Not vectorizing: Early exit is not the latch predecessor. +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv + %data = load i16, ptr %st.addr, align 2 + %inc = add nsw i16 %data, 1 + store i16 %inc, ptr %st.addr, align 2 + %rem = urem i64 %iv, 5 + %skip.ee.cmp = icmp eq i64 %rem, 0 + br i1 %skip.ee.cmp, label %for.inc, label %ee.block + +ee.block: + %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv + %ee.val = load i16, ptr %ee.addr, align 2 + %ee.cond = icmp sgt i16 %ee.val, 500 + br i1 %ee.cond, label %exit, label %for.inc + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %counted.cond = icmp eq i64 %iv.next, 20 + br i1 %counted.cond, label %exit, label %for.body + +exit: + ret void +} + +define void @test_nodep(ptr align 2 dereferenceable(40) readonly %pred) { +; CHECK-LABEL: LV: Checking a loop in 'test_nodep' +; CHECK: LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write. +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %st.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv + store i16 0, ptr %st.addr, align 2 + %ee.val = load i16, ptr %st.addr, align 2 + %ee.cond = icmp sgt i16 %ee.val, 500 + br i1 %ee.cond, label %exit, label %for.inc + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %counted.cond = icmp eq i64 %iv.next, 20 + br i1 %counted.cond, label %exit, label %for.body + +exit: + ret void +} + +define void @histogram_with_uncountable_exit(ptr noalias %buckets, ptr readonly %indices, ptr align 2 dereferenceable(40) readonly %pred) { +; CHECK-LABEL: LV: Checking a loop in 'histogram_with_uncountable_exit' +; CHECK: LV: Not vectorizing: Cannot vectorize unsafe dependencies in uncountable exit loop with side effects. +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv + %l.idx = load i32, ptr %gep.indices, align 4 + %idxprom1 = zext i32 %l.idx to i64 + %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1 + %l.bucket = load i32, ptr %gep.bucket, align 4 + %inc = add nsw i32 %l.bucket, 1 + store i32 %inc, ptr %gep.bucket, align 4 + %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv + %ee.val = load i16, ptr %ee.addr, align 2 + %ee.cond = icmp sgt i16 %ee.val, 500 + br i1 %ee.cond, label %exit, label %for.inc + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %counted.cond = icmp eq i64 %iv.next, 20 + br i1 %counted.cond, label %exit, label %for.body + +exit: + ret void +} + +define void @uncountable_exit_condition_address_is_invariant(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(2) readonly %pred) { +; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_condition_address_is_invariant' +; CHECK: LV: Not vectorizing: Uncountable exit condition depends on load with an address that is not an add recurrence. +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv + %data = load i16, ptr %st.addr, align 2 + %inc = add nsw i16 %data, 1 + store i16 %inc, ptr %st.addr, align 2 + %ee.val = load i16, ptr %pred, align 2 + %ee.cond = icmp sgt i16 %ee.val, 500 + br i1 %ee.cond, label %exit, label %for.inc + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %counted.cond = icmp eq i64 %iv.next, 20 + br i1 %counted.cond, label %exit, label %for.body + +exit: + ret void +} + declare void @init_mem(ptr, i64); declare i64 @get_an_unknown_offset();