From c219bb03e15a82b638ebf96005667017331fc4c4 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi Date: Wed, 14 May 2025 03:50:01 +0000 Subject: [PATCH 1/6] [ValueTracking][NFC]: Use injected condition to compute known FPClass --- llvm/include/llvm/Analysis/SimplifyQuery.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h index d1d34f22a2fc5..7c13880ab0bd6 100644 --- a/llvm/include/llvm/Analysis/SimplifyQuery.h +++ b/llvm/include/llvm/Analysis/SimplifyQuery.h @@ -63,6 +63,8 @@ struct InstrInfoQuery { struct CondContext { Value *Cond; bool Invert = false; + // Condition is true if CxtI is in the true successor of Cond. + bool CondIsTrue = false; SmallPtrSet AffectedValues; CondContext(Value *Cond) : Cond(Cond) {} From d4d36df22ee56c90a2f43afa4050b3d73358e92a Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi Date: Fri, 30 May 2025 16:44:40 +0000 Subject: [PATCH 2/6] Use !CondContext.Invert instead of CondIsTrue --- llvm/include/llvm/Analysis/SimplifyQuery.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h index 7c13880ab0bd6..d1d34f22a2fc5 100644 --- a/llvm/include/llvm/Analysis/SimplifyQuery.h +++ b/llvm/include/llvm/Analysis/SimplifyQuery.h @@ -63,8 +63,6 @@ struct InstrInfoQuery { struct CondContext { Value *Cond; bool Invert = false; - // Condition is true if CxtI is in the true successor of Cond. - bool CondIsTrue = false; SmallPtrSet AffectedValues; CondContext(Value *Cond) : Cond(Cond) {} From c4004e3e923de5836dc167022e531aed297d5414 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi Date: Wed, 14 May 2025 22:15:34 +0000 Subject: [PATCH 3/6] [InlineCost][precommit]: Add test file --- .../Transforms/Inline/inline-recursive-fn2.ll | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 llvm/test/Transforms/Inline/inline-recursive-fn2.ll diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll new file mode 100644 index 0000000000000..cda08b613ddb8 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll @@ -0,0 +1,39 @@ +; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s + +; CHECK: Inlining calls in: test +; CHECK: Function size: 2 +; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale) +; CHECK: Size after inlining: 10 + +; CHECK: Inlining calls in: inline_rec_true_successor +; CHECK: Function size: 10 +; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %fneg, float %scale) +; CHECK: Size after inlining: 17 +; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i) + + +define float @test(float %x, float %scale) noinline { +entry: + %call = tail call float @inline_rec_true_successor(float %x, float %scale) + ret float %call +} + +define float @inline_rec_true_successor(float %x, float %scale) { +entry: + %cmp = fcmp olt float %x, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +common.ret18: ; preds = %if.then, %if.end + %common.ret18.op = phi float [ %call_test, %if.then ], [ %mul, %if.end ] + ret float %common.ret18.op + +if.then: ; preds = %entry + %fneg = fneg float %x + %call = tail call float @inline_rec_true_successor(float %fneg, float %scale) + %call_test = tail call float @test(float %fneg, float %call) + br label %common.ret18 + +if.end: ; preds = %entry + %mul = fmul float %x, %scale + br label %common.ret18 +} From ae2a97f7d395a4805d0b1382bb78f59c3fdd4eee Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi Date: Wed, 14 May 2025 22:50:26 +0000 Subject: [PATCH 4/6] [InlineCost]: Optimize inlining of recursive function. - Consider inlining recursive function of depth 1 only when the caller is the function itself instead of inlining it for each callsite so that we avoid redundant work. - Use CondContext instead of DomTree for better compilation time. --- llvm/lib/Analysis/InlineCost.cpp | 102 ++++++++---------- .../Transforms/Inline/inline-recursive-fn2.ll | 10 +- 2 files changed, 52 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 8ddfa1e4eb6f7..c51bc12acc168 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1688,66 +1688,52 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) { if (!isa(Cmp.getOperand(0)) || !isa(Cmp.getOperand(1))) return false; auto *CmpOp = Cmp.getOperand(0); - Function *F = Cmp.getFunction(); - // Iterate over the users of the function to check if it's a recursive - // function: - for (auto *U : F->users()) { - CallInst *Call = dyn_cast(U); - if (!Call || Call->getFunction() != F || Call->getCalledFunction() != F) - continue; - auto *CallBB = Call->getParent(); - auto *Predecessor = CallBB->getSinglePredecessor(); - // Only handle the case when the callsite has a single predecessor: - if (!Predecessor) - continue; + // Make sure that the callsite is recursive: + if (CandidateCall.getCaller() != &F) + return false; + CallInst *CallInstr = dyn_cast(&CandidateCall); + // Only handle the case when the callsite has a single predecessor: + auto *CallBB = CallInstr->getParent(); + auto *Predecessor = CallBB->getSinglePredecessor(); + if (!Predecessor) + return false; + // Check if the callsite is guarded by the same Cmp instruction: + auto *Br = dyn_cast(Predecessor->getTerminator()); + if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp) + return false; - auto *Br = dyn_cast(Predecessor->getTerminator()); - if (!Br || Br->isUnconditional()) - continue; - // Check if the Br condition is the same Cmp instr we are investigating: - if (Br->getCondition() != &Cmp) - continue; - // Check if there are any arg of the recursive callsite is affecting the cmp - // instr: - bool ArgFound = false; - Value *FuncArg = nullptr, *CallArg = nullptr; - for (unsigned ArgNum = 0; - ArgNum < F->arg_size() && ArgNum < Call->arg_size(); ArgNum++) { - FuncArg = F->getArg(ArgNum); - CallArg = Call->getArgOperand(ArgNum); - if (FuncArg == CmpOp && CallArg != CmpOp) { - ArgFound = true; - break; - } - } - if (!ArgFound) - continue; - // Now we have a recursive call that is guarded by a cmp instruction. - // Check if this cmp can be simplified: - SimplifyQuery SQ(DL, dyn_cast(CallArg)); - DomConditionCache DC; - DC.registerBranch(Br); - SQ.DC = &DC; - if (DT.root_size() == 0) { - // Dominator tree was never constructed for any function yet. - DT.recalculate(*F); - } else if (DT.getRoot()->getParent() != F) { - // Dominator tree was constructed for a different function, recalculate - // it for the current function. - DT.recalculate(*F); + // Check if there is any arg of the recursive callsite is affecting the cmp + // instr: + bool ArgFound = false; + Value *FuncArg = nullptr, *CallArg = nullptr; + for (unsigned ArgNum = 0; + ArgNum < F.arg_size() && ArgNum < CallInstr->arg_size(); ArgNum++) { + FuncArg = F.getArg(ArgNum); + CallArg = CallInstr->getArgOperand(ArgNum); + if (FuncArg == CmpOp && CallArg != CmpOp) { + ArgFound = true; + break; } - SQ.DT = &DT; - Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands( - cast(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ); - if (auto *ConstVal = dyn_cast_or_null(SimplifiedInstruction)) { - bool IsTrueSuccessor = CallBB == Br->getSuccessor(0); - // Make sure that the BB of the recursive call is NOT the next successor - // of the icmp. In other words, make sure that the recursion depth is 1. - if ((ConstVal->isOne() && !IsTrueSuccessor) || - (ConstVal->isZero() && IsTrueSuccessor)) { - SimplifiedValues[&Cmp] = ConstVal; - return true; - } + } + if (!ArgFound) + return false; + + // Now we have a recursive call that is guarded by a cmp instruction. + // Check if this cmp can be simplified: + SimplifyQuery SQ(DL, dyn_cast(CallArg)); + CondContext CC(cast(&Cmp)); + CC.CondIsTrue = CallBB == Br->getSuccessor(0); + SQ.CC = &CC; + CC.AffectedValues.insert(FuncArg); + Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands( + cast(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ); + if (auto *ConstVal = dyn_cast_or_null(SimplifiedInstruction)) { + // Make sure that the BB of the recursive call is NOT the true successor + // of the icmp. In other words, make sure that the recursion depth is 1. + if ((ConstVal->isOne() && !CC.CondIsTrue) || + (ConstVal->isZero() && CC.CondIsTrue)) { + SimplifiedValues[&Cmp] = ConstVal; + return true; } } return false; diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll index cda08b613ddb8..0323a6ee3a75a 100644 --- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll +++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll @@ -2,15 +2,21 @@ ; CHECK: Inlining calls in: test ; CHECK: Function size: 2 -; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale) -; CHECK: Size after inlining: 10 +; CHECK: NOT Inlining (cost=never): recursive, Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale) ; CHECK: Inlining calls in: inline_rec_true_successor ; CHECK: Function size: 10 ; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %fneg, float %scale) ; CHECK: Size after inlining: 17 ; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i) +; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test.i = tail call float @test(float %x, float %call.i) +; CHECK: Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor +; CHECK: Updated inlining SCC: (test, inline_rec_true_successor) +; CHECK: Inlining calls in: test +; CHECK: Function size: 2 +; CHECK: Inlining (cost=25, threshold=225), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale) +; CHECK: Size after inlining: 10 define float @test(float %x, float %scale) noinline { entry: From 07181585fcc6cb24923acbc4329d4636af9ebb7a Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi Date: Sun, 1 Jun 2025 18:17:25 +0000 Subject: [PATCH 5/6] resolve comments --- llvm/lib/Analysis/InlineCost.cpp | 17 +++++++---------- .../Transforms/Inline/inline-recursive-fn2.ll | 1 + 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index c51bc12acc168..7bd1f18004580 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -263,8 +263,6 @@ class CallAnalyzer : public InstVisitor { // Cache the DataLayout since we use it a lot. const DataLayout &DL; - DominatorTree DT; - /// The OptimizationRemarkEmitter available for this compilation. OptimizationRemarkEmitter *ORE; @@ -1691,9 +1689,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) { // Make sure that the callsite is recursive: if (CandidateCall.getCaller() != &F) return false; - CallInst *CallInstr = dyn_cast(&CandidateCall); // Only handle the case when the callsite has a single predecessor: - auto *CallBB = CallInstr->getParent(); + auto *CallBB = CandidateCall.getParent(); auto *Predecessor = CallBB->getSinglePredecessor(); if (!Predecessor) return false; @@ -1707,9 +1704,9 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) { bool ArgFound = false; Value *FuncArg = nullptr, *CallArg = nullptr; for (unsigned ArgNum = 0; - ArgNum < F.arg_size() && ArgNum < CallInstr->arg_size(); ArgNum++) { + ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) { FuncArg = F.getArg(ArgNum); - CallArg = CallInstr->getArgOperand(ArgNum); + CallArg = CandidateCall.getArgOperand(ArgNum); if (FuncArg == CmpOp && CallArg != CmpOp) { ArgFound = true; break; @@ -1721,8 +1718,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) { // Now we have a recursive call that is guarded by a cmp instruction. // Check if this cmp can be simplified: SimplifyQuery SQ(DL, dyn_cast(CallArg)); - CondContext CC(cast(&Cmp)); - CC.CondIsTrue = CallBB == Br->getSuccessor(0); + CondContext CC(&Cmp); + CC.Invert = (CallBB != Br->getSuccessor(0)); SQ.CC = &CC; CC.AffectedValues.insert(FuncArg); Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands( @@ -1730,8 +1727,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) { if (auto *ConstVal = dyn_cast_or_null(SimplifiedInstruction)) { // Make sure that the BB of the recursive call is NOT the true successor // of the icmp. In other words, make sure that the recursion depth is 1. - if ((ConstVal->isOne() && !CC.CondIsTrue) || - (ConstVal->isZero() && CC.CondIsTrue)) { + if ((ConstVal->isOne() && CC.Invert) || + (ConstVal->isZero() && !CC.Invert)) { SimplifiedValues[&Cmp] = ConstVal; return true; } diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll index 0323a6ee3a75a..80e43733112be 100644 --- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll +++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s ; CHECK: Inlining calls in: test From 6f7b1a4db1f383911c540ecfa2eeaf1f958ecc52 Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi Date: Mon, 2 Jun 2025 13:43:06 +0000 Subject: [PATCH 6/6] Rebase. Add explanation comment to the test file --- llvm/test/Transforms/Inline/inline-recursive-fn2.ll | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll index 80e43733112be..52d7b21902e8e 100644 --- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll +++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll @@ -1,6 +1,9 @@ ; REQUIRES: asserts ; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s +; This test shows that the recursive function will not get simplified +; unless the caller is the function itself, not another different caller. + ; CHECK: Inlining calls in: test ; CHECK: Function size: 2 ; CHECK: NOT Inlining (cost=never): recursive, Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)