diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 7cb96422589fe..2a44ec8032236 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -194,7 +194,8 @@ class MemoryDepChecker { /// of a write access. LLVM_ABI void addAccess(LoadInst *LI); - /// Check whether the dependencies between the accesses are safe. + /// Check whether the dependencies between the accesses are safe, and records + /// the dependence information in Dependences if so. /// /// Only checks sets with elements in \p CheckDeps. LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, @@ -654,7 +655,8 @@ class RuntimePointerChecking { /// For memory dependences that cannot be determined at compile time, it /// generates run-time checks to prove independence. This is done by /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the -/// RuntimePointerCheck class. +/// RuntimePointerCheck class. \p AllowPartial determines whether partial checks +/// are generated when not all pointers could be analyzed. /// /// If pointers can wrap or can't be expressed as affine AddRec expressions by /// ScalarEvolution, we will generate run-time checks by emitting a @@ -667,7 +669,8 @@ class LoopAccessInfo { LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, - DominatorTree *DT, LoopInfo *LI); + DominatorTree *DT, LoopInfo *LI, + bool AllowPartial = false); /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. Note that for dependences between loads & @@ -682,6 +685,11 @@ class LoopAccessInfo { /// not legal to insert them. bool hasConvergentOp() const { return HasConvergentOp; } + /// Return true if, when runtime pointer checking does not have complete + /// results, it instead has partial results for those memory accesses that + /// could be analyzed. + bool hasAllowPartial() const { return AllowPartial; } + const RuntimePointerChecking *getRuntimePointerChecking() const { return PtrRtChecking.get(); } @@ -784,20 +792,30 @@ class LoopAccessInfo { /// We need to check that all of the pointers in this list are disjoint /// at runtime. Using std::unique_ptr to make using move ctor simpler. + /// If AllowPartial is true then this list may contain only partial + /// information when we've failed to analyze all the memory accesses in the + /// loop, in which case HasCompletePtrRtChecking will be false. std::unique_ptr PtrRtChecking; - /// the Memory Dependence Checker which can determine the + /// The Memory Dependence Checker which can determine the /// loop-independent and loop-carried dependences between memory accesses. + /// This will be empty if we've failed to analyze all the memory access in the + /// loop (i.e. CanVecMem is false). std::unique_ptr DepChecker; Loop *TheLoop; + /// Determines whether we should generate partial runtime checks when not all + /// memory accesses could be analyzed. + bool AllowPartial; + unsigned NumLoads = 0; unsigned NumStores = 0; /// Cache the result of analyzeLoop. bool CanVecMem = false; bool HasConvergentOp = false; + bool HasCompletePtrRtChecking = false; /// Indicator that there are two non vectorizable stores to the same uniform /// address. @@ -920,7 +938,7 @@ class LoopAccessInfoManager { const TargetLibraryInfo *TLI) : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {} - LLVM_ABI const LoopAccessInfo &getInfo(Loop &L); + LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false); LLVM_ABI void clear(); diff --git a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h index f445e0696b5f8..44ba3c24f6afb 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h @@ -20,9 +20,11 @@ class raw_ostream; class LoopAccessInfoPrinterPass : public PassInfoMixin { raw_ostream &OS; + bool AllowPartial; public: - explicit LoopAccessInfoPrinterPass(raw_ostream &OS) : OS(OS) {} + explicit LoopAccessInfoPrinterPass(raw_ostream &OS, bool AllowPartial) + : OS(OS), AllowPartial(AllowPartial) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); static bool isRequired() { return true; } }; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index f377da3926b26..24661d72149d2 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -530,8 +530,10 @@ void RuntimePointerChecking::groupChecks( // equivalence class, the iteration order is deterministic. for (auto M : DepCands.members(Access)) { auto PointerI = PositionMap.find(M.getPointer()); - assert(PointerI != PositionMap.end() && - "pointer in equivalence class not found in PositionMap"); + // If we can't find the pointer in PositionMap that means we can't + // generate a memcheck for it. + if (PointerI == PositionMap.end()) + continue; for (unsigned Pointer : PointerI->second) { bool Merged = false; // Mark this pointer as seen. @@ -693,10 +695,13 @@ class AccessAnalysis { /// non-intersection. /// /// Returns true if we need no check or if we do and we can generate them - /// (i.e. the pointers have computable bounds). + /// (i.e. the pointers have computable bounds). A return value of false means + /// we couldn't analyze and generate runtime checks for all pointers in the + /// loop, but if \p AllowPartial is set then we will have checks for those + /// pointers we could analyze. bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop, const DenseMap &Strides, - Value *&UncomputablePtr); + Value *&UncomputablePtr, bool AllowPartial); /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. @@ -1181,8 +1186,8 @@ bool AccessAnalysis::createCheckForAccess( bool AccessAnalysis::canCheckPtrAtRT( RuntimePointerChecking &RtCheck, Loop *TheLoop, - const DenseMap &StridesMap, - Value *&UncomputablePtr) { + const DenseMap &StridesMap, Value *&UncomputablePtr, + bool AllowPartial) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -1275,7 +1280,8 @@ bool AccessAnalysis::canCheckPtrAtRT( /*Assume=*/true)) { CanDoAliasSetRT = false; UncomputablePtr = Access.getPointer(); - break; + if (!AllowPartial) + break; } } } @@ -1315,7 +1321,7 @@ bool AccessAnalysis::canCheckPtrAtRT( } } - if (MayNeedRTCheck && CanDoRT) + if (MayNeedRTCheck && (CanDoRT || AllowPartial)) RtCheck.generateChecks(DepCands, IsDepCheckNeeded); LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() @@ -1329,7 +1335,7 @@ bool AccessAnalysis::canCheckPtrAtRT( bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT; assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) && "CanDoRTIfNeeded depends on RtCheck.Need"); - if (!CanDoRTIfNeeded) + if (!CanDoRTIfNeeded && !AllowPartial) RtCheck.reset(); return CanDoRTIfNeeded; } @@ -2599,9 +2605,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. Value *UncomputablePtr = nullptr; - bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( - *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr); - if (!CanDoRTIfNeeded) { + HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT( + *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial); + if (!HasCompletePtrRtChecking) { const auto *I = dyn_cast_or_null(UncomputablePtr); recordAnalysis("CantIdentifyArrayBounds", I) << "cannot identify array bounds"; @@ -2629,11 +2635,12 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, PtrRtChecking->Need = true; UncomputablePtr = nullptr; - CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( - *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr); + HasCompletePtrRtChecking = + Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides, + UncomputablePtr, AllowPartial); // Check that we found the bounds for the pointer. - if (!CanDoRTIfNeeded) { + if (!HasCompletePtrRtChecking) { auto *I = dyn_cast_or_null(UncomputablePtr); recordAnalysis("CantCheckMemDepsAtRunTime", I) << "cannot check memory dependencies at runtime"; @@ -2908,9 +2915,10 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, - DominatorTree *DT, LoopInfo *LI) + DominatorTree *DT, LoopInfo *LI, + bool AllowPartial) : PSE(std::make_unique(*SE, *L)), - PtrRtChecking(nullptr), TheLoop(L) { + PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) { unsigned MaxTargetVectorWidthInBits = std::numeric_limits::max(); if (TTI && !TTI->enableScalableVectorization()) // Scale the vector width by 2 as rough estimate to also consider @@ -2959,6 +2967,8 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { // List the pair of accesses need run-time checks to prove independence. PtrRtChecking->print(OS, Depth); + if (PtrRtChecking->Need && !HasCompletePtrRtChecking) + OS.indent(Depth) << "Generated run-time checks are incomplete\n"; OS << "\n"; OS.indent(Depth) @@ -2978,12 +2988,15 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { PSE->print(OS, Depth); } -const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) { +const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L, + bool AllowPartial) { const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L); - if (Inserted) - It->second = - std::make_unique(&L, &SE, TTI, TLI, &AA, &DT, &LI); + // We need to create the LoopAccessInfo if either we don't already have one, + // or if it was created with a different value of AllowPartial. + if (Inserted || It->second->hasAllowPartial() != AllowPartial) + It->second = std::make_unique(&L, &SE, TTI, TLI, &AA, &DT, + &LI, AllowPartial); return *It->second; } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index be93a7b1f5ba6..736b0f9c50bca 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -441,7 +441,6 @@ FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(errs())) FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(errs())) FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(errs())) FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(errs())) -FUNCTION_PASS("print", LoopAccessInfoPrinterPass(errs())) FUNCTION_PASS("print", AssumptionPrinterPass(errs())) FUNCTION_PASS("print", BlockFrequencyPrinterPass(errs())) FUNCTION_PASS("print", BranchProbabilityPrinterPass(errs())) @@ -583,6 +582,16 @@ FUNCTION_PASS_WITH_PARAMS( return MergedLoadStoreMotionPass(Opts); }, parseMergedLoadStoreMotionOptions, "no-split-footer-bb;split-footer-bb") +FUNCTION_PASS_WITH_PARAMS( + "print", "LoopAccessInfoPrinterPass", + [](bool AllowPartial) { + return LoopAccessInfoPrinterPass(errs(), AllowPartial); + }, + [](StringRef Params) { + return PassBuilder::parseSinglePassOption(Params, "allow-partial", + "LoopAccessInfoPrinterPass"); + }, + "allow-partial") FUNCTION_PASS_WITH_PARAMS( "print", "DependenceAnalysisPrinterPass", [](bool NormalizeResults) { diff --git a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp index 3d3f22d686e32..7253c7f222509 100644 --- a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp +++ b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp @@ -28,7 +28,7 @@ PreservedAnalyses LoopAccessInfoPrinterPass::run(Function &F, while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); OS.indent(2) << L->getHeader()->getName() << ":\n"; - LAIs.getInfo(*L).print(OS, 4); + LAIs.getInfo(*L, AllowPartial).print(OS, 4); } return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 6b4fc88cd3a72..4f2bfb073bafa 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -368,7 +368,7 @@ bool LoopVersioningLICM::legalLoopInstructions() { IsReadOnlyLoop = true; using namespace ore; // Get LoopAccessInfo from current loop via the proxy. - LAI = &LAIs.getInfo(*CurLoop); + LAI = &LAIs.getInfo(*CurLoop, /*AllowPartial=*/true); // Check LoopAccessInfo for need of runtime check. if (LAI->getRuntimePointerChecking()->getChecks().empty()) { LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); diff --git a/llvm/test/Analysis/LoopAccessAnalysis/allow-partial.ll b/llvm/test/Analysis/LoopAccessAnalysis/allow-partial.ll new file mode 100644 index 0000000000000..3096d545eb6d5 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/allow-partial.ll @@ -0,0 +1,99 @@ +; RUN: opt -disable-output -passes='print,print' %s 2>&1 | FileCheck %s --check-prefixes=ALLOW-BEFORE +; RUN: opt -disable-output -passes='print,print' %s 2>&1 | FileCheck %s --check-prefixes=ALLOW-AFTER + +; Check that we get the right results when loop access analysis is run twice, +; once without partial results and once with. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" + +define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) { +; ALLOW-BEFORE-LABEL: 'gep_loaded_offset' +; ALLOW-BEFORE-NEXT: while.body: +; ALLOW-BEFORE-NEXT: Report: cannot identify array bounds +; ALLOW-BEFORE-NEXT: Dependences: +; ALLOW-BEFORE-NEXT: Run-time memory checks: +; ALLOW-BEFORE-NEXT: Check 0: +; ALLOW-BEFORE-NEXT: Comparing group GRP0: +; ALLOW-BEFORE-NEXT: %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] +; ALLOW-BEFORE-NEXT: Against group GRP1: +; ALLOW-BEFORE-NEXT: ptr %r +; ALLOW-BEFORE-NEXT: Grouped accesses: +; ALLOW-BEFORE-NEXT: Group GRP0: +; ALLOW-BEFORE-NEXT: (Low: %p High: (4 + (4 * (zext i32 (-1 + %n) to i64)) + %p)) +; ALLOW-BEFORE-NEXT: Member: {%p,+,4}<%while.body> +; ALLOW-BEFORE-NEXT: Group GRP1: +; ALLOW-BEFORE-NEXT: (Low: %r High: (8 + %r)) +; ALLOW-BEFORE-NEXT: Member: %r +; ALLOW-BEFORE-NEXT: Generated run-time checks are incomplete +; ALLOW-BEFORE-EMPTY: +; ALLOW-BEFORE-NEXT: Non vectorizable stores to invariant address were not found in loop. +; ALLOW-BEFORE-NEXT: SCEV assumptions: +; ALLOW-BEFORE-EMPTY: +; ALLOW-BEFORE-NEXT: Expressions re-written: +; +; ALLOW-BEFORE-LABEL: 'gep_loaded_offset' +; ALLOW-BEFORE-NEXT: while.body: +; ALLOW-BEFORE-NEXT: Report: cannot identify array bounds +; ALLOW-BEFORE-NEXT: Dependences: +; ALLOW-BEFORE-NEXT: Run-time memory checks: +; ALLOW-BEFORE-NEXT: Grouped accesses: +; ALLOW-BEFORE-EMPTY: +; ALLOW-BEFORE-NEXT: Non vectorizable stores to invariant address were not found in loop. +; ALLOW-BEFORE-NEXT: SCEV assumptions: +; ALLOW-BEFORE-EMPTY: +; ALLOW-BEFORE-NEXT: Expressions re-written: +; +; ALLOW-AFTER-LABEL: 'gep_loaded_offset' +; ALLOW-AFTER-NEXT: while.body: +; ALLOW-AFTER-NEXT: Report: cannot identify array bounds +; ALLOW-AFTER-NEXT: Dependences: +; ALLOW-AFTER-NEXT: Run-time memory checks: +; ALLOW-AFTER-NEXT: Grouped accesses: +; ALLOW-AFTER-EMPTY: +; ALLOW-AFTER-NEXT: Non vectorizable stores to invariant address were not found in loop. +; ALLOW-AFTER-NEXT: SCEV assumptions: +; ALLOW-AFTER-EMPTY: +; ALLOW-AFTER-NEXT: Expressions re-written: +; +; ALLOW-AFTER-LABEL: 'gep_loaded_offset' +; ALLOW-AFTER-NEXT: while.body: +; ALLOW-AFTER-NEXT: Report: cannot identify array bounds +; ALLOW-AFTER-NEXT: Dependences: +; ALLOW-AFTER-NEXT: Run-time memory checks: +; ALLOW-AFTER-NEXT: Check 0: +; ALLOW-AFTER-NEXT: Comparing group GRP0: +; ALLOW-AFTER-NEXT: %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] +; ALLOW-AFTER-NEXT: Against group GRP1: +; ALLOW-AFTER-NEXT: ptr %r +; ALLOW-AFTER-NEXT: Grouped accesses: +; ALLOW-AFTER-NEXT: Group GRP0: +; ALLOW-AFTER-NEXT: (Low: %p High: (4 + (4 * (zext i32 (-1 + %n) to i64)) + %p)) +; ALLOW-AFTER-NEXT: Member: {%p,+,4}<%while.body> +; ALLOW-AFTER-NEXT: Group GRP1: +; ALLOW-AFTER-NEXT: (Low: %r High: (8 + %r)) +; ALLOW-AFTER-NEXT: Member: %r +; ALLOW-AFTER-NEXT: Generated run-time checks are incomplete +; ALLOW-AFTER-EMPTY: +; ALLOW-AFTER-NEXT: Non vectorizable stores to invariant address were not found in loop. +; ALLOW-AFTER-NEXT: SCEV assumptions: +; ALLOW-AFTER-EMPTY: +; ALLOW-AFTER-NEXT: Expressions re-written: +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load i64, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval + %val = load i32, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll index c28dd58fcb204..fa72ae910cbbb 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FULLDEPTH -; RUN: opt -disable-output -passes='print' -max-forked-scev-depth=2 %s 2>&1 | FileCheck %s --check-prefixes=CHECK,DEPTH2 +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FULLDEPTH +; RUN: opt -disable-output -passes='print' -max-forked-scev-depth=2 %s 2>&1 | FileCheck %s --check-prefixes=CHECK,DEPTH2 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -404,7 +404,19 @@ define dso_local void @forked_ptrs_same_base_different_offset(ptr nocapture read ; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Dest High: (400 + %Dest)) +; CHECK-NEXT: Member: {%Dest,+,4}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %Preds High: (400 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,4}<%for.body> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -475,7 +487,19 @@ define dso_local void @forked_ptrs_add_to_offset(ptr nocapture readonly %Base, p ; DEPTH2-NEXT: Report: cannot identify array bounds ; DEPTH2-NEXT: Dependences: ; DEPTH2-NEXT: Run-time memory checks: +; DEPTH2-NEXT: Check 0: +; DEPTH2-NEXT: Comparing group GRP0: +; DEPTH2-NEXT: %arrayidx5 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; DEPTH2-NEXT: Against group GRP1: +; DEPTH2-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; DEPTH2-NEXT: Grouped accesses: +; DEPTH2-NEXT: Group GRP0: +; DEPTH2-NEXT: (Low: %Dest High: (400 + %Dest)) +; DEPTH2-NEXT: Member: {%Dest,+,4}<%for.body> +; DEPTH2-NEXT: Group GRP1: +; DEPTH2-NEXT: (Low: %Preds High: (400 + %Preds)) +; DEPTH2-NEXT: Member: {%Preds,+,4}<%for.body> +; DEPTH2-NEXT: Generated run-time checks are incomplete ; DEPTH2-EMPTY: ; DEPTH2-NEXT: Non vectorizable stores to invariant address were not found in loop. ; DEPTH2-NEXT: SCEV assumptions: @@ -543,7 +567,19 @@ define dso_local void @forked_ptrs_sub_from_offset(ptr nocapture readonly %Base, ; DEPTH2-NEXT: Report: cannot identify array bounds ; DEPTH2-NEXT: Dependences: ; DEPTH2-NEXT: Run-time memory checks: +; DEPTH2-NEXT: Check 0: +; DEPTH2-NEXT: Comparing group GRP0: +; DEPTH2-NEXT: %arrayidx5 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; DEPTH2-NEXT: Against group GRP1: +; DEPTH2-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; DEPTH2-NEXT: Grouped accesses: +; DEPTH2-NEXT: Group GRP0: +; DEPTH2-NEXT: (Low: %Dest High: (400 + %Dest)) +; DEPTH2-NEXT: Member: {%Dest,+,4}<%for.body> +; DEPTH2-NEXT: Group GRP1: +; DEPTH2-NEXT: (Low: %Preds High: (400 + %Preds)) +; DEPTH2-NEXT: Member: {%Preds,+,4}<%for.body> +; DEPTH2-NEXT: Generated run-time checks are incomplete ; DEPTH2-EMPTY: ; DEPTH2-NEXT: Non vectorizable stores to invariant address were not found in loop. ; DEPTH2-NEXT: SCEV assumptions: @@ -611,7 +647,19 @@ define dso_local void @forked_ptrs_add_sub_offset(ptr nocapture readonly %Base, ; DEPTH2-NEXT: Report: cannot identify array bounds ; DEPTH2-NEXT: Dependences: ; DEPTH2-NEXT: Run-time memory checks: +; DEPTH2-NEXT: Check 0: +; DEPTH2-NEXT: Comparing group GRP0: +; DEPTH2-NEXT: %arrayidx5 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; DEPTH2-NEXT: Against group GRP1: +; DEPTH2-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; DEPTH2-NEXT: Grouped accesses: +; DEPTH2-NEXT: Group GRP0: +; DEPTH2-NEXT: (Low: %Dest High: (400 + %Dest)) +; DEPTH2-NEXT: Member: {%Dest,+,4}<%for.body> +; DEPTH2-NEXT: Group GRP1: +; DEPTH2-NEXT: (Low: %Preds High: (400 + %Preds)) +; DEPTH2-NEXT: Member: {%Preds,+,4}<%for.body> +; DEPTH2-NEXT: Generated run-time checks are incomplete ; DEPTH2-EMPTY: ; DEPTH2-NEXT: Non vectorizable stores to invariant address were not found in loop. ; DEPTH2-NEXT: SCEV assumptions: @@ -649,7 +697,19 @@ define dso_local void @forked_ptrs_mul_by_offset(ptr nocapture readonly %Base, p ; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Dest High: (400 + %Dest)) +; CHECK-NEXT: Member: {%Dest,+,4}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %Preds High: (400 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,4}<%for.body> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -687,7 +747,19 @@ define dso_local void @forked_ptrs_uniform_and_strided_forks(ptr nocapture reado ; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Dest High: (400 + %Dest)) +; CHECK-NEXT: Member: {%Dest,+,4}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %Preds High: (400 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,4}<%for.body> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -730,7 +802,19 @@ define dso_local void @forked_ptrs_gather_and_contiguous_forks(ptr nocapture rea ; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Dest High: (400 + %Dest)) +; CHECK-NEXT: Member: {%Dest,+,4}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %Preds High: (400 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,4}<%for.body> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -769,7 +853,19 @@ define dso_local void @forked_ptrs_two_forks_gep(ptr nocapture readonly %Base1, ; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Dest High: (400 + %Dest)) +; CHECK-NEXT: Member: {%Dest,+,4}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %Preds High: (400 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,4}<%for.body> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -807,6 +903,11 @@ define void @forked_ptrs_two_select(ptr nocapture readonly %Base1, ptr nocapture ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Dest High: (400 + %Dest)) +; CHECK-NEXT: Member: {%Dest,+,4}<%loop> +; CHECK-NEXT: Member: {%Dest,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -845,7 +946,19 @@ define void @forked_ptrs_too_many_gep_ops(ptr nocapture readonly %Base1, ptr noc ; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Dest High: (400 + %Dest)) +; CHECK-NEXT: Member: {%Dest,+,4}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %Preds High: (400 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,4}<%for.body> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -881,7 +994,19 @@ define void @forked_ptrs_vector_gep(ptr nocapture readonly %Base1, ptr nocapture ; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %1 = getelementptr inbounds <4 x float>, ptr %Dest, i64 %indvars.iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Dest High: (1552 + %Dest)) +; CHECK-NEXT: Member: {%Dest,+,64}<%for.body> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %Preds High: (388 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,16}<%for.body> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -1056,6 +1181,10 @@ define void @forked_ptrs_with_different_base3(ptr nocapture readonly %Preds, ptr ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %Preds High: (31996 + %Preds)) +; CHECK-NEXT: Member: {%Preds,+,4}<%for.body> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll index d263749ea1f46..3160a77e0ba5e 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -237,6 +237,10 @@ define void @different_non_constant_strides_known_backward_via_assume(ptr %A, i6 ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (1024 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -271,6 +275,10 @@ define void @different_non_constant_strides_known_backward_via_assume_distance_l ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (1024 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -306,6 +314,10 @@ define void @different_non_constant_strides_known_backward_via_assume_min_distan ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (1024 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -341,6 +353,10 @@ define void @different_non_constant_strides_not_known_backward(ptr %A, i64 %scal ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (1024 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll index 5f4c732dc19df..9e0a1e3a03dde 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -85,6 +85,10 @@ define void @different_non_constant_strides_known_forward_via_assume(ptr %A, i64 ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (1024 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -119,6 +123,10 @@ define void @different_non_constant_strides_known_forward_via_assume_min_distanc ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (1024 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -154,6 +162,10 @@ define void @different_non_constant_strides_not_known_forward(ptr %A, i64 %scale ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (1024 + %A)) +; CHECK-NEXT: Member: {%A,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll index bd38c50cb15d5..6fbe0e45976b6 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s %s1 = type { [32000 x double], [32000 x double], [32000 x double] } @@ -462,6 +462,10 @@ define i32 @store_with_pointer_phi_in_same_bb_use_other_phi(ptr %A, ptr %B, ptr ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (256000 + %A)) +; CHECK-NEXT: Member: {%A,+,8}<%loop.header> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll index 0763fd220d023..d1d1ecb2af888 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s define void @dependency_check_and_runtime_checks_needed_select_of_invariant_ptrs(ptr %a, ptr %b, ptr %c, i64 %offset, i64 %n) { @@ -167,6 +167,14 @@ define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_m ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) +; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a)) +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -208,6 +216,14 @@ define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_m ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) +; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a)) +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Member: {%a,+,4}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll index 402081fb939f0..0708f908211ef 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -80,6 +80,10 @@ define void @indirect_ptr_recurrences_read_write_may_alias_no_tbaa(ptr %A, ptr % ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: (8 + %B) High: (40 + %B)) +; CHECK-NEXT: Member: {(8 + %B),+,8}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -111,7 +115,19 @@ define void @indirect_ptr_recurrences_read_write_may_alias_different_obj(ptr %A, ; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep.C = getelementptr inbounds ptr, ptr %C, i64 %iv +; CHECK-NEXT: Against group GRP1: +; CHECK-NEXT: %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: (8 + %C) High: (36 + %C)) +; CHECK-NEXT: Member: {(8 + %C),+,8}<%loop> +; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: (Low: (8 + %B) High: (40 + %B)) +; CHECK-NEXT: Member: {(8 + %B),+,8}<%loop> +; CHECK-NEXT: Generated run-time checks are incomplete ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll b/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll index e9b2954039198..89576f1684c3d 100644 --- a/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll +++ b/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll @@ -7,15 +7,25 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6 ; accesses but not others. ; Load from a gep whose bounds can't be calculated as the offset is loaded from memory -; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) { ; CHECK-LABEL: define void @gep_loaded_offset( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[WHILE_BODY:.*]] -; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] -; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ] +; CHECK-NEXT: [[WHILE_BODY_LVER_CHECK:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[R]], i64 8 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[R]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[WHILE_BODY_PH_LVER_ORIG:.*]], label %[[WHILE_BODY_PH:.*]] +; CHECK: [[WHILE_BODY_PH_LVER_ORIG]]: +; CHECK-NEXT: br label %[[WHILE_BODY_LVER_ORIG:.*]] +; CHECK: [[WHILE_BODY_LVER_ORIG]]: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY_LVER_ORIG]] ], [ [[N]], %[[WHILE_BODY_PH_LVER_ORIG]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY_LVER_ORIG]] ], [ [[P]], %[[WHILE_BODY_PH_LVER_ORIG]] ] ; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 ; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]] @@ -23,7 +33,24 @@ define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) { ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4 ; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY_LVER_ORIG]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[WHILE_BODY_PH]]: +; CHECK-NEXT: [[RVAL1:%.*]] = load i64, ptr [[R]], align 4, !alias.scope [[META2:![0-9]+]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL1]] +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_ADDR1:%.*]] = phi i32 [ [[DEC1:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[P_ADDR1:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[DEC1]] = add nsw i32 [[N_ADDR1]], -1 +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds nuw i8, ptr [[P_ADDR1]], i64 4 +; CHECK-NEXT: store i32 [[VAL1]], ptr [[P_ADDR1]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META2]] +; CHECK-NEXT: [[TOBOOL_NOT1:%.*]] = icmp eq i32 [[DEC1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT1]], label %[[WHILE_END_LOOPEXIT2:.*]], label %[[WHILE_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[WHILE_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[WHILE_END:.*]] +; CHECK: [[WHILE_END_LOOPEXIT2]]: +; CHECK-NEXT: br label %[[WHILE_END]] ; CHECK: [[WHILE_END]]: ; CHECK-NEXT: ret void ; @@ -89,15 +116,25 @@ while.end: } ; Load from a gep whose bounds can't be calculated as the pointer is loaded from memory -; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval define void @gep_loaded_base(ptr %p, ptr %q, ptr %r, i32 %n) { ; CHECK-LABEL: define void @gep_loaded_base( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[WHILE_BODY:.*]] -; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] -; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ] +; CHECK-NEXT: [[WHILE_BODY_LVER_CHECK:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[R]], i64 8 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[R]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[WHILE_BODY_PH_LVER_ORIG:.*]], label %[[WHILE_BODY_PH:.*]] +; CHECK: [[WHILE_BODY_PH_LVER_ORIG]]: +; CHECK-NEXT: br label %[[WHILE_BODY_LVER_ORIG:.*]] +; CHECK: [[WHILE_BODY_LVER_ORIG]]: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY_LVER_ORIG]] ], [ [[N]], %[[WHILE_BODY_PH_LVER_ORIG]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY_LVER_ORIG]] ], [ [[P]], %[[WHILE_BODY_PH_LVER_ORIG]] ] ; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 ; CHECK-NEXT: [[RVAL:%.*]] = load ptr, ptr [[R]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[RVAL]], i64 0 @@ -105,7 +142,24 @@ define void @gep_loaded_base(ptr %p, ptr %q, ptr %r, i32 %n) { ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4 ; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY_LVER_ORIG]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[WHILE_BODY_PH]]: +; CHECK-NEXT: [[RVAL1:%.*]] = load ptr, ptr [[R]], align 4, !alias.scope [[META10:![0-9]+]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[RVAL1]], i64 0 +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_ADDR1:%.*]] = phi i32 [ [[DEC1:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[P_ADDR1:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[DEC1]] = add nsw i32 [[N_ADDR1]], -1 +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds nuw i8, ptr [[P_ADDR1]], i64 4 +; CHECK-NEXT: store i32 [[VAL1]], ptr [[P_ADDR1]], align 4, !alias.scope [[META13:![0-9]+]], !noalias [[META10]] +; CHECK-NEXT: [[TOBOOL_NOT1:%.*]] = icmp eq i32 [[DEC1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT1]], label %[[WHILE_END_LOOPEXIT2:.*]], label %[[WHILE_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: [[WHILE_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[WHILE_END:.*]] +; CHECK: [[WHILE_END_LOOPEXIT2]]: +; CHECK-NEXT: br label %[[WHILE_END]] ; CHECK: [[WHILE_END]]: ; CHECK-NEXT: ret void ; @@ -129,15 +183,25 @@ while.end: } ; Load from a gep with an offset that scalar evolution can't describe -; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of qval define void @gep_strange_offset(ptr %p, ptr %q, ptr %r, i32 %n) { ; CHECK-LABEL: define void @gep_strange_offset( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[WHILE_BODY:.*]] -; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] -; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ] +; CHECK-NEXT: [[WHILE_BODY_LVER_CHECK:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[WHILE_BODY_PH_LVER_ORIG:.*]], label %[[WHILE_BODY_PH:.*]] +; CHECK: [[WHILE_BODY_PH_LVER_ORIG]]: +; CHECK-NEXT: br label %[[WHILE_BODY_LVER_ORIG:.*]] +; CHECK: [[WHILE_BODY_LVER_ORIG]]: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY_LVER_ORIG]] ], [ [[N]], %[[WHILE_BODY_PH_LVER_ORIG]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY_LVER_ORIG]] ], [ [[P]], %[[WHILE_BODY_PH_LVER_ORIG]] ] ; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 ; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[Q]], align 4 ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[DEC]], 2 @@ -148,7 +212,27 @@ define void @gep_strange_offset(ptr %p, ptr %q, ptr %r, i32 %n) { ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4 ; CHECK-NEXT: store i32 [[ADD]], ptr [[P_ADDR]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY_LVER_ORIG]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK: [[WHILE_BODY_PH]]: +; CHECK-NEXT: [[QVAL1:%.*]] = load i32, ptr [[Q]], align 4, !alias.scope [[META17:![0-9]+]] +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_ADDR1:%.*]] = phi i32 [ [[DEC1:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[P_ADDR1:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[DEC1]] = add nsw i32 [[N_ADDR1]], -1 +; CHECK-NEXT: [[REM1:%.*]] = srem i32 [[DEC1]], 2 +; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[REM1]] to i64 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[IDXPROM1]] +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[VAL1]], [[QVAL1]] +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds nuw i8, ptr [[P_ADDR1]], i64 4 +; CHECK-NEXT: store i32 [[ADD1]], ptr [[P_ADDR1]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] +; CHECK-NEXT: [[TOBOOL_NOT1:%.*]] = icmp eq i32 [[DEC1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT1]], label %[[WHILE_END_LOOPEXIT2:.*]], label %[[WHILE_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: [[WHILE_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[WHILE_END:.*]] +; CHECK: [[WHILE_END_LOOPEXIT2]]: +; CHECK-NEXT: br label %[[WHILE_END]] ; CHECK: [[WHILE_END]]: ; CHECK-NEXT: ret void ; @@ -175,15 +259,24 @@ while.end: } ; A memcpy-like loop where the source address is loaded from a pointer -; FIXME: We should be able to hoist the load of the source address pointer define void @memcpy_load_src(ptr %dst, ptr %src, i32 %n) { ; CHECK-LABEL: define void @memcpy_load_src( ; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[WHILE_BODY:.*]] -; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] -; CHECK-NEXT: [[DST_VAL:%.*]] = phi ptr [ [[DST_VAL_NEXT:%.*]], %[[WHILE_BODY]] ], [ [[DST]], %[[ENTRY]] ] +; CHECK-NEXT: [[WHILE_BODY_LVER_CHECK:.*:]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[WHILE_BODY_PH_LVER_ORIG:.*]], label %[[WHILE_BODY_PH:.*]] +; CHECK: [[WHILE_BODY_PH_LVER_ORIG]]: +; CHECK-NEXT: br label %[[WHILE_BODY_LVER_ORIG:.*]] +; CHECK: [[WHILE_BODY_LVER_ORIG]]: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY_LVER_ORIG]] ], [ [[N]], %[[WHILE_BODY_PH_LVER_ORIG]] ] +; CHECK-NEXT: [[DST_VAL:%.*]] = phi ptr [ [[DST_VAL_NEXT:%.*]], %[[WHILE_BODY_LVER_ORIG]] ], [ [[DST]], %[[WHILE_BODY_PH_LVER_ORIG]] ] ; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 ; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8 ; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC_VAL]], i64 1 @@ -192,7 +285,26 @@ define void @memcpy_load_src(ptr %dst, ptr %src, i32 %n) { ; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 ; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY_LVER_ORIG]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK: [[WHILE_BODY_PH]]: +; CHECK-NEXT: [[SRC_PROMOTED:%.*]] = load ptr, ptr [[SRC]], align 8, !alias.scope [[META24:![0-9]+]], !noalias [[META27:![0-9]+]] +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[SRC_VAL_NEXT3:%.*]] = phi ptr [ [[SRC_VAL_NEXT1:%.*]], %[[WHILE_BODY]] ], [ [[SRC_PROMOTED]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[N_VAL1:%.*]] = phi i32 [ [[DEC1:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[DST_VAL1:%.*]] = phi ptr [ [[DST_VAL_NEXT1:%.*]], %[[WHILE_BODY]] ], [ [[DST]], %[[WHILE_BODY_PH]] ] +; CHECK-NEXT: [[DEC1]] = add nsw i32 [[N_VAL1]], -1 +; CHECK-NEXT: [[SRC_VAL_NEXT1]] = getelementptr inbounds nuw i8, ptr [[SRC_VAL_NEXT3]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT1]] = getelementptr inbounds nuw i8, ptr [[DST_VAL1]], i64 1 +; CHECK-NEXT: store ptr [[SRC_VAL_NEXT1]], ptr [[SRC]], align 8, !alias.scope [[META24]], !noalias [[META27]] +; CHECK-NEXT: [[VAL1:%.*]] = load i8, ptr [[SRC_VAL_NEXT3]], align 1 +; CHECK-NEXT: store i8 [[VAL1]], ptr [[DST_VAL1]], align 1, !alias.scope [[META27]] +; CHECK-NEXT: [[TOBOOL_NOT1:%.*]] = icmp eq i32 [[DEC1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT1]], label %[[WHILE_END_LOOPEXIT2:.*]], label %[[WHILE_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK: [[WHILE_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[WHILE_END:.*]] +; CHECK: [[WHILE_END_LOOPEXIT2]]: +; CHECK-NEXT: br label %[[WHILE_END]] ; CHECK: [[WHILE_END]]: ; CHECK-NEXT: ret void ;