diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 8ddfa1e4eb6f7..7bd1f18004580 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -263,8 +263,6 @@ class CallAnalyzer : public InstVisitor { // Cache the DataLayout since we use it a lot. const DataLayout &DL; - DominatorTree DT; - /// The OptimizationRemarkEmitter available for this compilation. OptimizationRemarkEmitter *ORE; @@ -1688,66 +1686,51 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) { if (!isa(Cmp.getOperand(0)) || !isa(Cmp.getOperand(1))) return false; auto *CmpOp = Cmp.getOperand(0); - Function *F = Cmp.getFunction(); - // Iterate over the users of the function to check if it's a recursive - // function: - for (auto *U : F->users()) { - CallInst *Call = dyn_cast(U); - if (!Call || Call->getFunction() != F || Call->getCalledFunction() != F) - continue; - auto *CallBB = Call->getParent(); - auto *Predecessor = CallBB->getSinglePredecessor(); - // Only handle the case when the callsite has a single predecessor: - if (!Predecessor) - continue; + // Make sure that the callsite is recursive: + if (CandidateCall.getCaller() != &F) + return false; + // Only handle the case when the callsite has a single predecessor: + auto *CallBB = CandidateCall.getParent(); + auto *Predecessor = CallBB->getSinglePredecessor(); + if (!Predecessor) + return false; + // Check if the callsite is guarded by the same Cmp instruction: + auto *Br = dyn_cast(Predecessor->getTerminator()); + if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp) + return false; - auto *Br = dyn_cast(Predecessor->getTerminator()); - if (!Br || Br->isUnconditional()) - continue; - // Check if the Br condition is the same Cmp instr we are investigating: - if (Br->getCondition() != &Cmp) - continue; - // Check if there are any arg of the recursive callsite is affecting the cmp - // instr: - bool ArgFound = false; - Value *FuncArg = nullptr, *CallArg = nullptr; - for (unsigned ArgNum = 0; - ArgNum < F->arg_size() && ArgNum < Call->arg_size(); ArgNum++) { - FuncArg = F->getArg(ArgNum); - CallArg = Call->getArgOperand(ArgNum); - if (FuncArg == CmpOp && CallArg != CmpOp) { - ArgFound = true; - break; - } - } - if (!ArgFound) - continue; - // Now we have a recursive call that is guarded by a cmp instruction. - // Check if this cmp can be simplified: - SimplifyQuery SQ(DL, dyn_cast(CallArg)); - DomConditionCache DC; - DC.registerBranch(Br); - SQ.DC = &DC; - if (DT.root_size() == 0) { - // Dominator tree was never constructed for any function yet. - DT.recalculate(*F); - } else if (DT.getRoot()->getParent() != F) { - // Dominator tree was constructed for a different function, recalculate - // it for the current function. - DT.recalculate(*F); + // Check if there is any arg of the recursive callsite is affecting the cmp + // instr: + bool ArgFound = false; + Value *FuncArg = nullptr, *CallArg = nullptr; + for (unsigned ArgNum = 0; + ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) { + FuncArg = F.getArg(ArgNum); + CallArg = CandidateCall.getArgOperand(ArgNum); + if (FuncArg == CmpOp && CallArg != CmpOp) { + ArgFound = true; + break; } - SQ.DT = &DT; - Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands( - cast(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ); - if (auto *ConstVal = dyn_cast_or_null(SimplifiedInstruction)) { - bool IsTrueSuccessor = CallBB == Br->getSuccessor(0); - // Make sure that the BB of the recursive call is NOT the next successor - // of the icmp. In other words, make sure that the recursion depth is 1. - if ((ConstVal->isOne() && !IsTrueSuccessor) || - (ConstVal->isZero() && IsTrueSuccessor)) { - SimplifiedValues[&Cmp] = ConstVal; - return true; - } + } + if (!ArgFound) + return false; + + // Now we have a recursive call that is guarded by a cmp instruction. + // Check if this cmp can be simplified: + SimplifyQuery SQ(DL, dyn_cast(CallArg)); + CondContext CC(&Cmp); + CC.Invert = (CallBB != Br->getSuccessor(0)); + SQ.CC = &CC; + CC.AffectedValues.insert(FuncArg); + Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands( + cast(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ); + if (auto *ConstVal = dyn_cast_or_null(SimplifiedInstruction)) { + // Make sure that the BB of the recursive call is NOT the true successor + // of the icmp. In other words, make sure that the recursion depth is 1. + if ((ConstVal->isOne() && CC.Invert) || + (ConstVal->isZero() && !CC.Invert)) { + SimplifiedValues[&Cmp] = ConstVal; + return true; } } return false; diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll new file mode 100644 index 0000000000000..52d7b21902e8e --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll @@ -0,0 +1,49 @@ +; REQUIRES: asserts +; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s + +; This test shows that the recursive function will not get simplified +; unless the caller is the function itself, not another different caller. + +; CHECK: Inlining calls in: test +; CHECK: Function size: 2 +; CHECK: NOT Inlining (cost=never): recursive, Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale) + +; CHECK: Inlining calls in: inline_rec_true_successor +; CHECK: Function size: 10 +; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %fneg, float %scale) +; CHECK: Size after inlining: 17 +; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i) +; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test.i = tail call float @test(float %x, float %call.i) +; CHECK: Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor +; CHECK: Updated inlining SCC: (test, inline_rec_true_successor) + +; CHECK: Inlining calls in: test +; CHECK: Function size: 2 +; CHECK: Inlining (cost=25, threshold=225), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale) +; CHECK: Size after inlining: 10 + +define float @test(float %x, float %scale) noinline { +entry: + %call = tail call float @inline_rec_true_successor(float %x, float %scale) + ret float %call +} + +define float @inline_rec_true_successor(float %x, float %scale) { +entry: + %cmp = fcmp olt float %x, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +common.ret18: ; preds = %if.then, %if.end + %common.ret18.op = phi float [ %call_test, %if.then ], [ %mul, %if.end ] + ret float %common.ret18.op + +if.then: ; preds = %entry + %fneg = fneg float %x + %call = tail call float @inline_rec_true_successor(float %fneg, float %scale) + %call_test = tail call float @test(float %fneg, float %call) + br label %common.ret18 + +if.end: ; preds = %entry + %mul = fmul float %x, %scale + br label %common.ret18 +}