diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 2df3c9049c7d6..86b86e8196fad 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -392,7 +392,8 @@ Instruction *removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr); /// /// Returns true if any basic block was removed. bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr, - MemorySSAUpdater *MSSAU = nullptr); + MemorySSAUpdater *MSSAU = nullptr, + bool KeepOneInputPHIs = false); /// Combine the metadata of two instructions so that K can replace J. Some /// metadata kinds can only be kept if K does not move, meaning it dominated diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 7017f6adf3a2b..f40d4d863ac6d 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -262,12 +263,52 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, return Changed; } +static void decayOneInputPHIs(BasicBlock &BB) { + if (BB.empty() || !isa(BB.begin())) + return; + unsigned NumPreds = cast(BB.front()).getNumIncomingValues(); + if (NumPreds != 1) + return; + for (PHINode &Phi : make_early_inc_range(BB.phis())) { + if (Value *PhiConst = Phi.hasConstantValue()) { + Phi.replaceAllUsesWith(PhiConst); + Phi.eraseFromParent(); + // Additionally, constant-fold conditional branch if there is one present, + // and the PHI has decayed into a constant, since it may make another CFG + // edge dead. + if (!isa(PhiConst)) + continue; + if (BranchInst *BI = dyn_cast(BB.getTerminator())) + if (BI->isConditional()) + if (auto *Cmp = dyn_cast(BI->getCondition())) + if (auto *LHS = dyn_cast(Cmp->getOperand(0))) + if (auto *RHS = dyn_cast(Cmp->getOperand(1))) { + const DataLayout &DL = BB.getModule()->getDataLayout(); + Constant *ConstCond = ConstantFoldCompareInstOperands( + Cmp->getPredicate(), LHS, RHS, DL); + if (ConstCond) + BI->setCondition(ConstCond); + } + } + } +} + +static bool removeUnreachableBlocksAndSimplify(Function &F, + DomTreeUpdater *DTU) { + bool Changed = removeUnreachableBlocks(F, DTU, /*MSSAU=*/nullptr, + /*KeepOneInputPHIs=*/true); + if (Changed) + for (BasicBlock &BB : F) + decayOneInputPHIs(BB); + return Changed; +} + static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT, const SimplifyCFGOptions &Options) { DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr); + bool EverChanged = removeUnreachableBlocksAndSimplify(F, DT ? &DTU : nullptr); EverChanged |= tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr); EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); @@ -285,7 +326,7 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, do { EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); - EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr); + EverChanged |= removeUnreachableBlocksAndSimplify(F, DT ? &DTU : nullptr); } while (EverChanged); return true; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 8c6c112ebacff..35b76b1511d51 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3080,7 +3080,8 @@ Instruction *llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, + bool KeepOneInputPHIs) { SmallPtrSet Reachable; bool Changed = markAliveBlocks(F, Reachable, DTU); @@ -3111,7 +3112,7 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, if (MSSAU) MSSAU->removeBlocks(BlocksToRemove); - DeleteDeadBlocks(BlocksToRemove.takeVector(), DTU); + DeleteDeadBlocks(BlocksToRemove.takeVector(), DTU, KeepOneInputPHIs); return Changed; } diff --git a/llvm/test/Analysis/MemorySSA/nondeterminism.ll b/llvm/test/Analysis/MemorySSA/nondeterminism.ll index 90902e36b5d58..312cd6e4709a9 100644 --- a/llvm/test/Analysis/MemorySSA/nondeterminism.ll +++ b/llvm/test/Analysis/MemorySSA/nondeterminism.ll @@ -1,7 +1,7 @@ ; RUN: opt -passes=simplifycfg -S --preserve-ll-uselistorder %s | FileCheck %s ; REQUIRES: x86-registered-target ; CHECK-LABEL: @n -; CHECK: uselistorder i16 0, { 3, 2, 4, 1, 5, 0, 6 } +; CHECK: uselistorder i16 0, { 0, 1, 4, 3, 5, 2, 6 } ; Note: test was added in an effort to ensure determinism when updating memoryssa. See PR42574. ; If the uselistorder check becomes no longer relevant, the test can be disabled or removed. diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll index 5208c055c4fdf..88b1883e651c5 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll @@ -42,7 +42,7 @@ suspend: ; CHECK-LABEL: @f( ; CHECK: %x.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 2 ; CHECK: %y.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 3 -; CHECK: %alias_phi = phi ptr [ %y.reload.addr, %merge.from.flag_false ], [ %x.reload.addr, %entry ] +; CHECK: %alias_phi = select i1 %n, ptr %x.reload.addr, ptr %y.reload.addr ; CHECK: %alias_phi.spill.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 4 ; CHECK: store ptr %alias_phi, ptr %alias_phi.spill.addr, align 8 diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll index c81bf333f2059..0967303b3a489 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll @@ -70,11 +70,7 @@ declare void @free(ptr) ; CHECK-NEXT: store ptr @f.destroy, ptr [[DESTROY_ADDR]], align 8 ; CHECK-NEXT: [[X_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[HDL]], i32 0, i32 2 ; CHECK-NEXT: [[Y_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[HDL]], i32 0, i32 3 -; CHECK-NEXT: br i1 [[N:%.*]], label [[MERGE:%.*]], label [[MERGE_FROM_FLAG_FALSE:%.*]] -; CHECK: merge.from.flag_false: -; CHECK-NEXT: br label [[MERGE:%.*]] -; CHECK: merge: -; CHECK-NEXT: [[ALIAS_PHI:%.*]] = phi ptr [ [[Y_RELOAD_ADDR]], [[MERGE_FROM_FLAG_FALSE]] ], [ [[X_RELOAD_ADDR]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ALIAS_PHI:%.*]] = select i1 [[N:%.*]], ptr [[X_RELOAD_ADDR]], ptr [[Y_RELOAD_ADDR]] ; CHECK-NEXT: [[ALIAS_PHI_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[HDL]], i32 0, i32 4 ; CHECK-NEXT: store ptr [[ALIAS_PHI]], ptr [[ALIAS_PHI_SPILL_ADDR]], align 8 ; CHECK-NEXT: store i8 1, ptr [[ALIAS_PHI]], align 1 diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-outside-frame.ll b/llvm/test/Transforms/Coroutines/coro-alloca-outside-frame.ll index ac6a5752438ce..0318822778901 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-outside-frame.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-outside-frame.ll @@ -43,7 +43,7 @@ suspend: ; CHECK: %x = alloca i64, align 8, !coro.outside.frame !0 ; CHECK-NOT: %x.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 2 ; CHECK: %y.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 2 -; CHECK: %alias_phi = phi ptr [ %y.reload.addr, %merge.from.flag_false ], [ %x, %entry ] +; CHECK: %alias_phi = select i1 %n, ptr %x, ptr %y.reload.addr declare ptr @llvm.coro.free(token, ptr) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll b/llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll index 801c4a1776135..39eb39ccf02d6 100644 --- a/llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll +++ b/llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll @@ -51,7 +51,7 @@ lpad: ; CHECK-LABEL: @f( ; CHECK: %alloc = call ptr @malloc(i32 32) ; CHECK-NEXT: %flag = call i1 @check(ptr %alloc) -; CHECK-NEXT: %spec.select = select i1 %flag, i32 0, i32 1 +; CHECK-NEXT: %value_phi = select i1 %flag, i32 0, i32 1 ; CHECK-NEXT: %value_invoke = call i32 @calc() ; CHECK-NEXT: %hdl = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) @@ -59,7 +59,7 @@ lpad: ; CHECK-NEXT: %value_invoke.spill.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 3 ; CHECK-NEXT: store i32 %value_invoke, ptr %value_invoke.spill.addr ; CHECK-NEXT: %value_phi.spill.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 2 -; CHECK-NEXT: store i32 %spec.select, ptr %value_phi.spill.addr +; CHECK-NEXT: store i32 %value_phi, ptr %value_phi.spill.addr declare ptr @llvm.coro.free(token, ptr) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/SimplifyCFG/intersection-block-with-dead-predecessor.ll b/llvm/test/Transforms/SimplifyCFG/intersection-block-with-dead-predecessor.ll new file mode 100644 index 0000000000000..d60a8e0dfaf47 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/intersection-block-with-dead-predecessor.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s + +; A pre-existing foldable pattern should remain intact +define void @const_valued_cond_br(ptr %P) { +; CHECK-LABEL: define void @const_valued_cond_br( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 42, 42 +; CHECK-NEXT: br i1 [[COND]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: store i32 123, ptr [[P]], align 4 +; CHECK-NEXT: br label [[B]] +; CHECK: b: +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i32 42, 42 + br i1 %cond, label %a, label %b +a: + store i32 123, ptr %P + br label %b +b: + ret void +} + +; When the phi decays to a constant, the terminator of `b` gets constant-folded, +; enabling further simplification. +define void @intersection_block_with_dead_predecessor(ptr %P) { +; CHECK-LABEL: define void @intersection_block_with_dead_predecessor( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 1, 1 +; CHECK-NEXT: store i32 321, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %b +b: + %x = phi i32 [1, %entry], [2, %a] + switch i32 %x, label %c [ + i32 1, label %d + ] +c: + store i32 123, ptr %P + ret void +d: + store i32 321, ptr %P + ret void +a: ; unreachable + br label %b +}