From ae956b475e091de4b90542ca422ae9117c1a3866 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 22 Oct 2024 22:52:37 +0000 Subject: [PATCH 1/4] [FuncSpec] Add tests for MaxCodeSizeGrowth --- .../function-specialization-maxgrowth.ll | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll new file mode 100644 index 0000000000000..4f557c0ac78ba --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ +; RUN: -funcspec-for-literal-constant=true \ +; RUN: -funcspec-min-codesize-savings=50 \ +; RUN: -funcspec-min-latency-savings=50 \ +; RUN: -funcspec-max-codesize-growth=1 \ +; RUN: -S < %s | FileCheck %s + +; Verify that we are able to specialize a function successfully after analysis +; of other specializations that are found to not be profitable. +define void @test_specialize_after_failed_analysis(i32 %n) { +entry: + %notspec0 = call i32 @add0(i32 0, i32 %n) + %notspec1 = call i32 @add0(i32 1, i32 %n) + %spec = call i32 @add0(i32 1, i32 1) + ret void +} + +define i32 @add0(i32 %x, i32 %y) { +entry: + %res = add i32 %x, %y + ret i32 %res +} + +; Verify that we do not specialize once maximum codesize growth has been +; exceeded. +define void @test_max_codesize_growth_exceeded(i32 %n) { +entry: + %spec0 = call i32 @add1(i32 0, i32 0) + %spec1 = call i32 @add1(i32 1, i32 1) + %spec2 = call i32 @add1(i32 2, i32 2) + %notspec = call i32 @add1(i32 3, i32 3) + ret void +} + +define i32 @add1(i32 %x, i32 %y) { +entry: + %res = add i32 %x, %y + ret i32 %res +} + +; CHECK-LABEL: define void @test_specialize_after_failed_analysis( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add0(i32 0, i32 [[N]]) +; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add0(i32 1, i32 [[N]]) +; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add0(i32 1, i32 1) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define i32 @add0( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[RES]] +; +; +; CHECK-LABEL: define void @test_max_codesize_growth_exceeded( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SPEC0:%.*]] = call i32 @add1.specialized.1(i32 0, i32 0) +; CHECK-NEXT: [[SPEC1:%.*]] = call i32 @add1.specialized.2(i32 1, i32 1) +; CHECK-NEXT: [[SPEC2:%.*]] = call i32 @add1.specialized.3(i32 2, i32 2) +; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @add1(i32 3, i32 3) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define i32 @add1( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[RES]] +; +; +; CHECK-LABEL: define internal i32 @add1.specialized.1( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @add1.specialized.2( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @add1.specialized.3( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 poison +; From a2d352b0c703983dabe3655ed34812b8ab0f626c Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 22 Oct 2024 17:00:40 +0000 Subject: [PATCH 2/4] [FuncSpec] Improve accounting of specialization codesize growth Only accumulate the codesize increase of functions that are actually specialized, rather than for every candidate specialization that we analyse. This fixes a subtle bug where prior analysis of candidate specializations that were deemed unprofitable could prevent subsequent profitable candidates from being recognised. --- .../Transforms/IPO/FunctionSpecialization.h | 11 +++-- .../Transforms/IPO/FunctionSpecialization.cpp | 44 +++++++++++-------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 5920dde9d77df..4d0be553aa6ed 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -131,13 +131,16 @@ struct Spec { // Profitability of the specialization. unsigned Score; + // Cost of the specialization, in terms of codesize. + unsigned CodeSizeCost; + // List of call sites, matching this specialization. SmallVector CallSites; - Spec(Function *F, const SpecSig &S, unsigned Score) - : F(F), Sig(S), Score(Score) {} - Spec(Function *F, const SpecSig &&S, unsigned Score) - : F(F), Sig(S), Score(Score) {} + Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSizeCost) + : F(F), Sig(S), Score(Score), CodeSizeCost(CodeSizeCost) {} + Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSizeCost) + : F(F), Sig(S), Score(Score), CodeSizeCost(CodeSizeCost) {} }; class InstCostVisitor : public InstVisitor { diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 20249a20a37e4..35865d7213acf 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -646,6 +646,18 @@ FunctionSpecializer::~FunctionSpecializer() { cleanUpSSA(); } +/// Get the unsigned Value of given Cost object. Assumes the Cost is always +/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and +/// always Valid. +static unsigned getCostValue(const Cost &C) { + int64_t Value = *C.getValue(); + + assert(Value >= 0 && "CodeSize and Latency cannot be negative"); + // It is safe to down cast since we know the arguments cannot be negative and + // Cost is of type int64_t. + return static_cast(Value); +} + /// Attempt to specialize functions in the module to enable constant /// propagation across function boundaries. /// @@ -759,6 +771,14 @@ bool FunctionSpecializer::run() { SmallVector Clones; for (unsigned I = 0; I < NSpecs; ++I) { Spec &S = AllSpecs[BestSpecs[I]]; + + // Check that creating this specialization doesn't exceed the maximum + // codesize growth. + unsigned FuncSize = getCostValue(FunctionMetrics[S.F].NumInsts); + if ((FunctionGrowth[S.F] + S.CodeSizeCost) / FuncSize > MaxCodeSizeGrowth) + continue; + FunctionGrowth[S.F] += S.CodeSizeCost; + S.Clone = createSpecialization(S.F, S.Sig); // Update the known call sites to call the clone. @@ -837,18 +857,6 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) { return Clone; } -/// Get the unsigned Value of given Cost object. Assumes the Cost is always -/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and -/// always Valid. -static unsigned getCostValue(const Cost &C) { - int64_t Value = *C.getValue(); - - assert(Value >= 0 && "CodeSize and Latency cannot be negative"); - // It is safe to down cast since we know the arguments cannot be negative and - // Cost is of type int64_t. - return static_cast(Value); -} - bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, SmallVectorImpl &AllSpecs, SpecMap &SM) { @@ -924,16 +932,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, } CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(); + unsigned CodeSizeSavings = getCostValue(CodeSize); + unsigned CodeSizeCost = FuncSize - CodeSizeSavings; + auto IsProfitable = [&]() -> bool { // No check required. if (ForceSpecialization) return true; - unsigned CodeSizeSavings = getCostValue(CodeSize); - // TODO: We should only accumulate codesize increase of specializations - // that are actually created. - FunctionGrowth[F] += FuncSize - CodeSizeSavings; - LLVM_DEBUG( dbgs() << "FnSpecialization: Specialization bonus {Inlining = " << Score << " (" << (Score * 100 / FuncSize) << "%)}\n"); @@ -964,7 +970,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, if (LatencySavings < MinLatencySavings * FuncSize / 100) return false; // Maximum codesize growth. - if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth) + if ((FunctionGrowth[F] + CodeSizeCost) / FuncSize > MaxCodeSizeGrowth) return false; Score += std::max(CodeSizeSavings, LatencySavings); @@ -976,7 +982,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, continue; // Create a new specialisation entry. - auto &Spec = AllSpecs.emplace_back(F, S, Score); + auto &Spec = AllSpecs.emplace_back(F, S, Score, CodeSizeCost); if (CS.getFunction() != F) Spec.CallSites.push_back(&CS); const unsigned Index = AllSpecs.size() - 1; From d289ffa1c5dfb356bc0f45895943f4599cc323ad Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 22 Oct 2024 23:12:52 +0000 Subject: [PATCH 3/4] Update tests --- .../function-specialization-maxgrowth.ll | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll index 4f557c0ac78ba..b4aea30f6b34d 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll @@ -44,7 +44,7 @@ entry: ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add0(i32 0, i32 [[N]]) ; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add0(i32 1, i32 [[N]]) -; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add0(i32 1, i32 1) +; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add0.specialized.1(i32 1, i32 1) ; CHECK-NEXT: ret void ; ; @@ -58,9 +58,9 @@ entry: ; CHECK-LABEL: define void @test_max_codesize_growth_exceeded( ; CHECK-SAME: i32 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SPEC0:%.*]] = call i32 @add1.specialized.1(i32 0, i32 0) -; CHECK-NEXT: [[SPEC1:%.*]] = call i32 @add1.specialized.2(i32 1, i32 1) -; CHECK-NEXT: [[SPEC2:%.*]] = call i32 @add1.specialized.3(i32 2, i32 2) +; CHECK-NEXT: [[SPEC0:%.*]] = call i32 @add1.specialized.2(i32 0, i32 0) +; CHECK-NEXT: [[SPEC1:%.*]] = call i32 @add1.specialized.3(i32 1, i32 1) +; CHECK-NEXT: [[SPEC2:%.*]] = call i32 @add1.specialized.4(i32 2, i32 2) ; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @add1(i32 3, i32 3) ; CHECK-NEXT: ret void ; @@ -72,7 +72,7 @@ entry: ; CHECK-NEXT: ret i32 [[RES]] ; ; -; CHECK-LABEL: define internal i32 @add1.specialized.1( +; CHECK-LABEL: define internal i32 @add0.specialized.1( ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: ret i32 poison @@ -89,3 +89,9 @@ entry: ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: ret i32 poison ; +; +; CHECK-LABEL: define internal i32 @add1.specialized.4( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 poison +; From 5c572334e308f1fb89490690fb396440645d4b35 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Mon, 28 Oct 2024 10:37:27 +0000 Subject: [PATCH 4/4] Address review comments - Rename local variable (s/CodeSizeCost/SpecSize) - Rename struct member (s/CodeSizeCost/CodeSize) and update comment - Remove early exit from specialization creation loop - Remove redundant prefix from regression test name - Remove regression test for exceeding FuncGrowth --- .../Transforms/IPO/FunctionSpecialization.h | 12 +-- .../Transforms/IPO/FunctionSpecialization.cpp | 15 ++- .../function-specialization-maxgrowth.ll | 97 ------------------- .../FunctionSpecialization/maxgrowth.ll | 44 +++++++++ 4 files changed, 56 insertions(+), 112 deletions(-) delete mode 100644 llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll create mode 100644 llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 4d0be553aa6ed..651fa970ee3d5 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -131,16 +131,16 @@ struct Spec { // Profitability of the specialization. unsigned Score; - // Cost of the specialization, in terms of codesize. - unsigned CodeSizeCost; + // Number of instructions in the specialization. + unsigned CodeSize; // List of call sites, matching this specialization. SmallVector CallSites; - Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSizeCost) - : F(F), Sig(S), Score(Score), CodeSizeCost(CodeSizeCost) {} - Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSizeCost) - : F(F), Sig(S), Score(Score), CodeSizeCost(CodeSizeCost) {} + Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize) + : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} + Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize) + : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} }; class InstCostVisitor : public InstVisitor { diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 35865d7213acf..a94bb9c80bca6 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -772,12 +772,9 @@ bool FunctionSpecializer::run() { for (unsigned I = 0; I < NSpecs; ++I) { Spec &S = AllSpecs[BestSpecs[I]]; - // Check that creating this specialization doesn't exceed the maximum - // codesize growth. - unsigned FuncSize = getCostValue(FunctionMetrics[S.F].NumInsts); - if ((FunctionGrowth[S.F] + S.CodeSizeCost) / FuncSize > MaxCodeSizeGrowth) - continue; - FunctionGrowth[S.F] += S.CodeSizeCost; + // Accumulate the codesize growth for the function, now we are creating the + // specialization. + FunctionGrowth[S.F] += S.CodeSize; S.Clone = createSpecialization(S.F, S.Sig); @@ -933,7 +930,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(); unsigned CodeSizeSavings = getCostValue(CodeSize); - unsigned CodeSizeCost = FuncSize - CodeSizeSavings; + unsigned SpecSize = FuncSize - CodeSizeSavings; auto IsProfitable = [&]() -> bool { // No check required. @@ -970,7 +967,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, if (LatencySavings < MinLatencySavings * FuncSize / 100) return false; // Maximum codesize growth. - if ((FunctionGrowth[F] + CodeSizeCost) / FuncSize > MaxCodeSizeGrowth) + if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth) return false; Score += std::max(CodeSizeSavings, LatencySavings); @@ -982,7 +979,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, continue; // Create a new specialisation entry. - auto &Spec = AllSpecs.emplace_back(F, S, Score, CodeSizeCost); + auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize); if (CS.getFunction() != F) Spec.CallSites.push_back(&CS); const unsigned Index = AllSpecs.size() - 1; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll deleted file mode 100644 index b4aea30f6b34d..0000000000000 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-maxgrowth.ll +++ /dev/null @@ -1,97 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 -; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ -; RUN: -funcspec-for-literal-constant=true \ -; RUN: -funcspec-min-codesize-savings=50 \ -; RUN: -funcspec-min-latency-savings=50 \ -; RUN: -funcspec-max-codesize-growth=1 \ -; RUN: -S < %s | FileCheck %s - -; Verify that we are able to specialize a function successfully after analysis -; of other specializations that are found to not be profitable. -define void @test_specialize_after_failed_analysis(i32 %n) { -entry: - %notspec0 = call i32 @add0(i32 0, i32 %n) - %notspec1 = call i32 @add0(i32 1, i32 %n) - %spec = call i32 @add0(i32 1, i32 1) - ret void -} - -define i32 @add0(i32 %x, i32 %y) { -entry: - %res = add i32 %x, %y - ret i32 %res -} - -; Verify that we do not specialize once maximum codesize growth has been -; exceeded. -define void @test_max_codesize_growth_exceeded(i32 %n) { -entry: - %spec0 = call i32 @add1(i32 0, i32 0) - %spec1 = call i32 @add1(i32 1, i32 1) - %spec2 = call i32 @add1(i32 2, i32 2) - %notspec = call i32 @add1(i32 3, i32 3) - ret void -} - -define i32 @add1(i32 %x, i32 %y) { -entry: - %res = add i32 %x, %y - ret i32 %res -} - -; CHECK-LABEL: define void @test_specialize_after_failed_analysis( -; CHECK-SAME: i32 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add0(i32 0, i32 [[N]]) -; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add0(i32 1, i32 [[N]]) -; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add0.specialized.1(i32 1, i32 1) -; CHECK-NEXT: ret void -; -; -; CHECK-LABEL: define i32 @add0( -; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] -; CHECK-NEXT: ret i32 [[RES]] -; -; -; CHECK-LABEL: define void @test_max_codesize_growth_exceeded( -; CHECK-SAME: i32 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SPEC0:%.*]] = call i32 @add1.specialized.2(i32 0, i32 0) -; CHECK-NEXT: [[SPEC1:%.*]] = call i32 @add1.specialized.3(i32 1, i32 1) -; CHECK-NEXT: [[SPEC2:%.*]] = call i32 @add1.specialized.4(i32 2, i32 2) -; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @add1(i32 3, i32 3) -; CHECK-NEXT: ret void -; -; -; CHECK-LABEL: define i32 @add1( -; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] -; CHECK-NEXT: ret i32 [[RES]] -; -; -; CHECK-LABEL: define internal i32 @add0.specialized.1( -; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret i32 poison -; -; -; CHECK-LABEL: define internal i32 @add1.specialized.2( -; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret i32 poison -; -; -; CHECK-LABEL: define internal i32 @add1.specialized.3( -; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret i32 poison -; -; -; CHECK-LABEL: define internal i32 @add1.specialized.4( -; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret i32 poison -; diff --git a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll new file mode 100644 index 0000000000000..82d1f7ae4a6e1 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ +; RUN: -funcspec-for-literal-constant=true \ +; RUN: -funcspec-min-codesize-savings=50 \ +; RUN: -funcspec-min-latency-savings=50 \ +; RUN: -funcspec-max-codesize-growth=1 \ +; RUN: -S < %s | FileCheck %s + +; Verify that we are able to specialize a function successfully after analysis +; of other specializations that are found to not be profitable. +define void @test_specialize_after_failed_analysis(i32 %n) { +entry: + %notspec0 = call i32 @add(i32 0, i32 %n) + %notspec1 = call i32 @add(i32 1, i32 %n) + %spec = call i32 @add(i32 1, i32 1) + ret void +} + +define i32 @add(i32 %x, i32 %y) { +entry: + %res = add i32 %x, %y + ret i32 %res +} +; CHECK-LABEL: define void @test_specialize_after_failed_analysis( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]]) +; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]]) +; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define i32 @add( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[RES]] +; +; +; CHECK-LABEL: define internal i32 @add.specialized.1( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 poison +;