diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 5920dde9d77df..651fa970ee3d5 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -131,13 +131,16 @@ struct Spec { // Profitability of the specialization. unsigned Score; + // Number of instructions in the specialization. + unsigned CodeSize; + // List of call sites, matching this specialization. SmallVector CallSites; - Spec(Function *F, const SpecSig &S, unsigned Score) - : F(F), Sig(S), Score(Score) {} - Spec(Function *F, const SpecSig &&S, unsigned Score) - : F(F), Sig(S), Score(Score) {} + Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize) + : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} + Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize) + : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} }; class InstCostVisitor : public InstVisitor { diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 20249a20a37e4..a94bb9c80bca6 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -646,6 +646,18 @@ FunctionSpecializer::~FunctionSpecializer() { cleanUpSSA(); } +/// Get the unsigned Value of given Cost object. Assumes the Cost is always +/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and +/// always Valid. +static unsigned getCostValue(const Cost &C) { + int64_t Value = *C.getValue(); + + assert(Value >= 0 && "CodeSize and Latency cannot be negative"); + // It is safe to down cast since we know the arguments cannot be negative and + // Cost is of type int64_t. + return static_cast(Value); +} + /// Attempt to specialize functions in the module to enable constant /// propagation across function boundaries. /// @@ -759,6 +771,11 @@ bool FunctionSpecializer::run() { SmallVector Clones; for (unsigned I = 0; I < NSpecs; ++I) { Spec &S = AllSpecs[BestSpecs[I]]; + + // Accumulate the codesize growth for the function, now we are creating the + // specialization. + FunctionGrowth[S.F] += S.CodeSize; + S.Clone = createSpecialization(S.F, S.Sig); // Update the known call sites to call the clone. @@ -837,18 +854,6 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) { return Clone; } -/// Get the unsigned Value of given Cost object. Assumes the Cost is always -/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and -/// always Valid. -static unsigned getCostValue(const Cost &C) { - int64_t Value = *C.getValue(); - - assert(Value >= 0 && "CodeSize and Latency cannot be negative"); - // It is safe to down cast since we know the arguments cannot be negative and - // Cost is of type int64_t. - return static_cast(Value); -} - bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, SmallVectorImpl &AllSpecs, SpecMap &SM) { @@ -924,16 +929,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, } CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(); + unsigned CodeSizeSavings = getCostValue(CodeSize); + unsigned SpecSize = FuncSize - CodeSizeSavings; + auto IsProfitable = [&]() -> bool { // No check required. if (ForceSpecialization) return true; - unsigned CodeSizeSavings = getCostValue(CodeSize); - // TODO: We should only accumulate codesize increase of specializations - // that are actually created. - FunctionGrowth[F] += FuncSize - CodeSizeSavings; - LLVM_DEBUG( dbgs() << "FnSpecialization: Specialization bonus {Inlining = " << Score << " (" << (Score * 100 / FuncSize) << "%)}\n"); @@ -964,7 +967,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, if (LatencySavings < MinLatencySavings * FuncSize / 100) return false; // Maximum codesize growth. - if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth) + if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth) return false; Score += std::max(CodeSizeSavings, LatencySavings); @@ -976,7 +979,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, continue; // Create a new specialisation entry. - auto &Spec = AllSpecs.emplace_back(F, S, Score); + auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize); if (CS.getFunction() != F) Spec.CallSites.push_back(&CS); const unsigned Index = AllSpecs.size() - 1; diff --git a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll new file mode 100644 index 0000000000000..82d1f7ae4a6e1 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ +; RUN: -funcspec-for-literal-constant=true \ +; RUN: -funcspec-min-codesize-savings=50 \ +; RUN: -funcspec-min-latency-savings=50 \ +; RUN: -funcspec-max-codesize-growth=1 \ +; RUN: -S < %s | FileCheck %s + +; Verify that we are able to specialize a function successfully after analysis +; of other specializations that are found to not be profitable. +define void @test_specialize_after_failed_analysis(i32 %n) { +entry: + %notspec0 = call i32 @add(i32 0, i32 %n) + %notspec1 = call i32 @add(i32 1, i32 %n) + %spec = call i32 @add(i32 1, i32 1) + ret void +} + +define i32 @add(i32 %x, i32 %y) { +entry: + %res = add i32 %x, %y + ret i32 %res +} +; CHECK-LABEL: define void @test_specialize_after_failed_analysis( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]]) +; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]]) +; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define i32 @add( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[RES]] +; +; +; CHECK-LABEL: define internal i32 @add.specialized.1( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 poison +;