diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index abc0bb8588fa8..ec99795d11907 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -326,6 +326,12 @@ class LLVM_ABI Loop : public LoopBase { /// Return true if the loop body is safe to clone in practice. bool isSafeToClone() const; + /// Like `isSafeToClone`, but also checks whether we may form phis for all + /// values that are live-out from the loop. This is required if either of + /// the cloned loop bodies may run conditionally. + bool isSafeToCloneConditionally(const DominatorTree &DT, + bool AllowConvergent = false) const; + /// Returns true if the loop is annotated parallel. /// /// A parallel loop can be assumed to not contain any dependencies between diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 0eaf1dca59675..279b5744f17fb 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -428,6 +428,27 @@ bool Loop::isCanonical(ScalarEvolution &SE) const { return true; } +static bool loopContainsUser(const Loop &L, const BasicBlock &BB, const Use &U, + const DominatorTree &DT) { + const Instruction *UI = cast(U.getUser()); + const BasicBlock *UserBB = UI->getParent(); + + // For practical purposes, we consider that the use in a PHI + // occurs in the respective predecessor block. For more info, + // see the `phi` doc in LangRef and the LCSSA doc. + if (const PHINode *P = dyn_cast(UI)) + UserBB = P->getIncomingBlock(U); + + // Check the current block, as a fast-path, before checking whether + // the use is anywhere in the loop. Most values are used in the same + // block they are defined in. Also, blocks not reachable from the + // entry are special; uses in them don't need to go through PHIs. + if (UserBB != &BB && !L.contains(UserBB) && DT.isReachableFromEntry(UserBB)) + return false; + + return true; +} + // Check that 'BB' doesn't have any uses outside of the 'L' static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, const DominatorTree &DT, bool IgnoreTokens) { @@ -439,21 +460,7 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, continue; for (const Use &U : I.uses()) { - const Instruction *UI = cast(U.getUser()); - const BasicBlock *UserBB = UI->getParent(); - - // For practical purposes, we consider that the use in a PHI - // occurs in the respective predecessor block. For more info, - // see the `phi` doc in LangRef and the LCSSA doc. - if (const PHINode *P = dyn_cast(UI)) - UserBB = P->getIncomingBlock(U); - - // Check the current block, as a fast-path, before checking whether - // the use is anywhere in the loop. Most values are used in the same - // block they are defined in. Also, blocks not reachable from the - // entry are special; uses in them don't need to go through PHIs. - if (UserBB != &BB && !L.contains(UserBB) && - DT.isReachableFromEntry(UserBB)) + if (!loopContainsUser(L, BB, U, DT)) return false; } } @@ -499,6 +506,29 @@ bool Loop::isSafeToClone() const { return true; } +bool Loop::isSafeToCloneConditionally(const DominatorTree &DT, + bool AllowConvergent) const { + // Return false if any loop blocks contain indirectbrs, or there are any calls + // to noduplicate functions. + for (BasicBlock *BB : this->blocks()) { + if (isa(BB->getTerminator())) + return false; + + for (Instruction &I : *BB) { + if (I.getType()->isTokenTy()) { + for (const Use &U : I.uses()) { + if (!loopContainsUser(*this, *BB, U, DT)) + return false; + } + } + if (auto *CB = dyn_cast(&I)) + if (CB->cannotDuplicate() || (AllowConvergent || CB->isConvergent())) + return false; + } + } + return true; +} + MDNode *Loop::getLoopID() const { MDNode *LoopID = nullptr; diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 4f7956514b7b5..6a48c5116acc4 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -3271,19 +3271,10 @@ static bool collectUnswitchCandidatesWithInjections( return Found; } -static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) { - if (!L.isSafeToClone()) +static bool isSafeForNoNTrivialUnswitching(const DominatorTree &DT, Loop &L, + LoopInfo &LI) { + if (!L.isSafeToCloneConditionally(DT)) return false; - for (auto *BB : L.blocks()) - for (auto &I : *BB) { - if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) - return false; - if (auto *CB = dyn_cast(&I)) { - assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone()."); - if (CB->isConvergent()) - return false; - } - } // Check if there are irreducible CFG cycles in this loop. If so, we cannot // easily unswitch non-trivial edges out of the loop. Doing so might turn the @@ -3661,7 +3652,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, } // Perform legality checks. - if (!isSafeForNoNTrivialUnswitching(L, LI)) + if (!isSafeForNoNTrivialUnswitching(DT, L, LI)) return false; // For non-trivial unswitching, because it often creates new loops, we rely on diff --git a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp index 8f103153059e8..56e806222e764 100644 --- a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp +++ b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp @@ -750,6 +750,13 @@ bool LoopConstrainer::run() { const SCEVConstant *MinusOneS = cast(SE.getConstant(IVTy, -1, true /* isSigned */)); + if ((NeedsPreLoop || NeedsPostLoop) && + !OriginalLoop.isSafeToCloneConditionally(DT)) { + LLVM_DEBUG(dbgs() << "cannot clone loop " << OriginalLoop.getName() + << " because it is not safe to clone.\n"); + return false; + } + if (NeedsPreLoop) { const SCEV *ExitPreLoopAtSCEV = nullptr; diff --git a/llvm/test/Transforms/IRCE/pre_post_loops_clone.ll b/llvm/test/Transforms/IRCE/pre_post_loops_clone.ll new file mode 100644 index 0000000000000..2ad20c15b569d --- /dev/null +++ b/llvm/test/Transforms/IRCE/pre_post_loops_clone.ll @@ -0,0 +1,61 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -S < %s 2>&1 | FileCheck %s +; This test is the same as pre_post_loops.ll, except that the loop body contains a token-generating +; call. We need to ensure that IRCE does not try to introduce a PHI or otherwise generate invalid IE. + +declare token @llvm.source_token() +declare void @llvm.sink_token(token) + +; CHECK: define void @test_01 +define void @test_01(ptr %arr, ptr %a_len_ptr) { +entry: + %len = load i32, ptr %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + %token = call token @llvm.source_token() + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, ptr %arr, i32 %idx + store i32 0, ptr %addr + %next = icmp slt i32 %idx.next, 2147483647 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + call void @llvm.sink_token(token %token) + ret void +} + +define void @test_02(ptr %arr, ptr %a_len_ptr) { +entry: + %len = load i32, ptr %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 2147483647, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp slt i32 %idx, %len + %token = call token @llvm.source_token() + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, ptr %arr, i32 %idx + store i32 0, ptr %addr + %next = icmp sgt i32 %idx.next, -1 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + call void @llvm.sink_token(token %token) + ret void +} + +!0 = !{i32 0, i32 50}