diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 871a6e97861e2..2880553817ebd 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -74,7 +74,8 @@ class InlineAdvisor; class InlineAdvice { public: InlineAdvice(InlineAdvisor *Advisor, CallBase &CB, - OptimizationRemarkEmitter &ORE, bool IsInliningRecommended); + OptimizationRemarkEmitter &ORE, bool IsInliningRecommended, + std::optional InliningCost = std::nullopt); InlineAdvice(InlineAdvice &&) = delete; InlineAdvice(const InlineAdvice &) = delete; @@ -108,6 +109,7 @@ class InlineAdvice { /// Get the inlining recommendation. bool isInliningRecommended() const { return IsInliningRecommended; } + std::optional inliningCost() const { return InliningCost; } const DebugLoc &getOriginalCallSiteDebugLoc() const { return DLoc; } const BasicBlock *getOriginalCallSiteBasicBlock() const { return Block; } @@ -129,6 +131,7 @@ class InlineAdvice { const BasicBlock *const Block; OptimizationRemarkEmitter &ORE; const bool IsInliningRecommended; + const std::optional InliningCost; private: void markRecorded() { @@ -145,8 +148,11 @@ class DefaultInlineAdvice : public InlineAdvice { DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB, std::optional OIC, OptimizationRemarkEmitter &ORE, bool EmitRemarks = true) - : InlineAdvice(Advisor, CB, ORE, OIC.has_value()), OriginalCB(&CB), - OIC(OIC), EmitRemarks(EmitRemarks) {} + : InlineAdvice(Advisor, CB, ORE, OIC.has_value(), + OIC && OIC->isVariable() + ? std::optional(OIC->getCost()) + : std::nullopt), + OriginalCB(&CB), OIC(OIC), EmitRemarks(EmitRemarks) {} private: void recordUnsuccessfulInliningImpl(const InlineResult &Result) override; diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index c5978ce54fc18..1190308dba419 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -58,6 +58,9 @@ const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536; const char FunctionInlineCostMultiplierAttributeName[] = "function-inline-cost-multiplier"; +/// Cost of call site accumulation added after inlining. +const char FunctionInlineAdditionalCostAttributeName[] = + "function-inline-additional-cost"; const char MaxInlineStackSizeAttributeName[] = "inline-max-stacksize"; } // namespace InlineConstants diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index c6907cb128bb4..9706dd212d7bc 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -175,10 +175,12 @@ DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) { InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB, OptimizationRemarkEmitter &ORE, - bool IsInliningRecommended) + bool IsInliningRecommended, + std::optional InliningCost) : Advisor(Advisor), Caller(CB.getCaller()), Callee(CB.getCalledFunction()), DLoc(CB.getDebugLoc()), Block(CB.getParent()), ORE(ORE), - IsInliningRecommended(IsInliningRecommended) {} + IsInliningRecommended(IsInliningRecommended), InliningCost(InliningCost) { +} void InlineAdvice::recordInlineStatsIfNeeded() { if (Advisor->ImportedFunctionsStats) diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index d2c329ba748e5..e45423f2130d8 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1017,6 +1017,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { InlineConstants::FunctionInlineCostMultiplierAttributeName)) Cost *= *AttrCostMult; + if (std::optional AttrAdditonalCost = getStringFnAttrAsInt( + CandidateCall, + InlineConstants::FunctionInlineAdditionalCostAttributeName)) + Cost += *AttrAdditonalCost; + if (std::optional AttrThreshold = getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) Threshold = *AttrThreshold; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 23ee23eb047f5..977bbcd35f73e 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -376,6 +376,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, getStringFnAttrAsInt( *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName) .value_or(1); + int CBInliningAdditionalCost = + getStringFnAttrAsInt( + *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName) + .value_or(0); + std::optional InliningCost = Advice->inliningCost(); // Setup the data structure used to plumb customization into the // `InlineFunction` routine. @@ -435,6 +440,16 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, InlineConstants::FunctionInlineCostMultiplierAttributeName, itostr(CBCostMult * IntraSCCCostMultiplier)); ICB->addFnAttr(NewCBCostMult); + } else if (InliningCost && *InliningCost > 0) { + // Similar to hot call site thresholds that can cause Inliner to + // inline numerous functions causing compile time issues, a + // linear accumulator was created to mitigate the problem. + Attribute NewCBAdditionalCost = Attribute::get( + M.getContext(), + InlineConstants::FunctionInlineAdditionalCostAttributeName, + itostr(CBInliningAdditionalCost + + (*InliningCost - CBInliningAdditionalCost) / 16)); + ICB->addFnAttr(NewCBAdditionalCost); } } } diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index dbc733826944b..c196be9d6dd16 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -225,6 +225,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, Advice->recordUnattemptedInlining(); continue; } + int CBInliningAdditionalCost = + getStringFnAttrAsInt( + *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName) + .value_or(0); + std::optional InliningCost = Advice->inliningCost(); // Setup the data structure used to plumb customization into the // `InlineFunction` routine. @@ -265,8 +270,20 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, NewCallee = ICB->getCalledFunction(); } if (NewCallee) - if (!NewCallee->isDeclaration()) + if (!NewCallee->isDeclaration()) { Calls->push({ICB, NewHistoryID}); + if (InliningCost && *InliningCost > 0) { + // Similar to hot call site thresholds that can cause Inliner to + // inline numerous functions causing compile time issues, a linear + // accumulator was created to mitigate the problem. + Attribute NewCBAdditionalCost = Attribute::get( + M.getContext(), + InlineConstants::FunctionInlineAdditionalCostAttributeName, + itostr(CBInliningAdditionalCost + + (*InliningCost - CBInliningAdditionalCost) / 16)); + ICB->addFnAttr(NewCBAdditionalCost); + } + } } } diff --git a/llvm/test/Transforms/Inline/inline-history-noinline.ll b/llvm/test/Transforms/Inline/inline-history-noinline.ll index 742bd25ecd9bb..fbe633fc3c797 100644 --- a/llvm/test/Transforms/Inline/inline-history-noinline.ll +++ b/llvm/test/Transforms/Inline/inline-history-noinline.ll @@ -29,4 +29,4 @@ define internal void @a() { ret void } -; CHECK: [[NOINLINE]] = { noinline } +; CHECK: [[NOINLINE]] = { noinline {{.*}}} diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll new file mode 100644 index 0000000000000..a1730d76cd547 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; This tests that a hot callsite gets the (higher) inlinehint-threshold even without +; without inline hints and gets inlined because the cost is less than +; RUN: opt < %s -passes=inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s +; RUN: opt < %s -passes=module-inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s + +; Due to the hot call site, foo0 inlined foo1, foo2, and foo3, +; but foo4 is not inlined due to the accumulated cost. + +declare void @bar(ptr) + +define void @foo0(ptr %p) { +; CHECK-LABEL: define void @foo0( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[HEADER:.*:]] +; CHECK-NEXT: [[I_I2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I_I1:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I]]) +; CHECK-NEXT: call void @bar(ptr [[I_I]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I1]]) +; CHECK-NEXT: call void @bar(ptr [[I_I1]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I2]]) +; CHECK-NEXT: call void @bar(ptr [[I_I2]]) +; CHECK-NEXT: call void @foo4(ptr [[P]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I2]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I]]) +; CHECK-NEXT: br label %[[LOOP]] +; +header: + br label %loop + +loop: + call void @foo1(ptr %p) + br label %loop +} + +define void @foo1(ptr %p) { +; CHECK-LABEL: define void @foo1( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @bar(ptr [[I]]) +; CHECK-NEXT: call void @foo2(ptr [[P]]) +; CHECK-NEXT: ret void +; + %i = alloca i32 + call void @bar(ptr %i) + call void @foo2(ptr %p) + ret void +} + +define void @foo2(ptr %p) { +; CHECK-LABEL: define void @foo2( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @bar(ptr [[I]]) +; CHECK-NEXT: call void @foo3(ptr [[P]]) +; CHECK-NEXT: ret void +; + %i = alloca i32 + call void @bar(ptr %i) + call void @foo3(ptr %p) + ret void +} + +define void @foo3(ptr %p) { +; CHECK-LABEL: define void @foo3( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @bar(ptr [[I]]) +; CHECK-NEXT: call void @foo4(ptr [[P]]) +; CHECK-NEXT: ret void +; + %i = alloca i32 + call void @bar(ptr %i) + call void @foo4(ptr %p) + ret void +} + +define void @foo4(ptr %p) { +; CHECK-LABEL: define void @foo4( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @bar(ptr [[I]]) +; CHECK-NEXT: call void @foo5(ptr [[P]]) +; CHECK-NEXT: ret void +; + %i = alloca i32 + call void @bar(ptr %i) + call void @foo5(ptr %p) + ret void +} + +define void @foo5(ptr %p) { +; CHECK-LABEL: define void @foo5( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @bar(ptr [[I]]) +; CHECK-NEXT: call void @bar(ptr [[I]]) +; CHECK-NEXT: ret void +; + %i = alloca i32 + call void @bar(ptr %i) + call void @bar(ptr %i) + ret void +}