-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[FuncSpec] Improve accounting of specialization codesize growth #113448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -646,6 +646,18 @@ FunctionSpecializer::~FunctionSpecializer() { | |
| cleanUpSSA(); | ||
| } | ||
|
|
||
| /// Get the unsigned Value of given Cost object. Assumes the Cost is always | ||
| /// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and | ||
| /// always Valid. | ||
| static unsigned getCostValue(const Cost &C) { | ||
| int64_t Value = *C.getValue(); | ||
|
|
||
| assert(Value >= 0 && "CodeSize and Latency cannot be negative"); | ||
| // It is safe to down cast since we know the arguments cannot be negative and | ||
| // Cost is of type int64_t. | ||
| return static_cast<unsigned>(Value); | ||
| } | ||
|
|
||
| /// Attempt to specialize functions in the module to enable constant | ||
| /// propagation across function boundaries. | ||
| /// | ||
|
|
@@ -759,6 +771,14 @@ bool FunctionSpecializer::run() { | |
| SmallVector<Function *> Clones; | ||
| for (unsigned I = 0; I < NSpecs; ++I) { | ||
| Spec &S = AllSpecs[BestSpecs[I]]; | ||
|
|
||
| // Check that creating this specialization doesn't exceed the maximum | ||
| // codesize growth. | ||
| unsigned FuncSize = getCostValue(FunctionMetrics[S.F].NumInsts); | ||
| if ((FunctionGrowth[S.F] + S.CodeSizeCost) / FuncSize > MaxCodeSizeGrowth) | ||
| continue; | ||
|
||
| FunctionGrowth[S.F] += S.CodeSizeCost; | ||
|
|
||
| S.Clone = createSpecialization(S.F, S.Sig); | ||
|
|
||
| // Update the known call sites to call the clone. | ||
|
|
@@ -837,18 +857,6 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) { | |
| return Clone; | ||
| } | ||
|
|
||
| /// Get the unsigned Value of given Cost object. Assumes the Cost is always | ||
| /// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and | ||
| /// always Valid. | ||
| static unsigned getCostValue(const Cost &C) { | ||
| int64_t Value = *C.getValue(); | ||
|
|
||
| assert(Value >= 0 && "CodeSize and Latency cannot be negative"); | ||
| // It is safe to down cast since we know the arguments cannot be negative and | ||
| // Cost is of type int64_t. | ||
| return static_cast<unsigned>(Value); | ||
| } | ||
|
|
||
| bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, | ||
| SmallVectorImpl<Spec> &AllSpecs, | ||
| SpecMap &SM) { | ||
|
|
@@ -924,16 +932,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, | |
| } | ||
| CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(); | ||
|
|
||
| unsigned CodeSizeSavings = getCostValue(CodeSize); | ||
| unsigned CodeSizeCost = FuncSize - CodeSizeSavings; | ||
|
|
||
labrinea marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| auto IsProfitable = [&]() -> bool { | ||
| // No check required. | ||
| if (ForceSpecialization) | ||
| return true; | ||
|
|
||
| unsigned CodeSizeSavings = getCostValue(CodeSize); | ||
| // TODO: We should only accumulate codesize increase of specializations | ||
| // that are actually created. | ||
| FunctionGrowth[F] += FuncSize - CodeSizeSavings; | ||
|
|
||
| LLVM_DEBUG( | ||
| dbgs() << "FnSpecialization: Specialization bonus {Inlining = " | ||
| << Score << " (" << (Score * 100 / FuncSize) << "%)}\n"); | ||
|
|
@@ -964,7 +970,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, | |
| if (LatencySavings < MinLatencySavings * FuncSize / 100) | ||
| return false; | ||
| // Maximum codesize growth. | ||
| if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth) | ||
| if ((FunctionGrowth[F] + CodeSizeCost) / FuncSize > MaxCodeSizeGrowth) | ||
labrinea marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return false; | ||
|
|
||
labrinea marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Score += std::max(CodeSizeSavings, LatencySavings); | ||
|
|
@@ -976,7 +982,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, | |
| continue; | ||
|
|
||
| // Create a new specialisation entry. | ||
| auto &Spec = AllSpecs.emplace_back(F, S, Score); | ||
| auto &Spec = AllSpecs.emplace_back(F, S, Score, CodeSizeCost); | ||
| if (CS.getFunction() != F) | ||
| Spec.CallSites.push_back(&CS); | ||
| const unsigned Index = AllSpecs.size() - 1; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 | ||
| ; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \ | ||
| ; RUN: -funcspec-for-literal-constant=true \ | ||
| ; RUN: -funcspec-min-codesize-savings=50 \ | ||
| ; RUN: -funcspec-min-latency-savings=50 \ | ||
| ; RUN: -funcspec-max-codesize-growth=1 \ | ||
| ; RUN: -S < %s | FileCheck %s | ||
|
|
||
| ; Verify that we are able to specialize a function successfully after analysis | ||
| ; of other specializations that are found to not be profitable. | ||
| define void @test_specialize_after_failed_analysis(i32 %n) { | ||
| entry: | ||
| %notspec0 = call i32 @add0(i32 0, i32 %n) | ||
| %notspec1 = call i32 @add0(i32 1, i32 %n) | ||
| %spec = call i32 @add0(i32 1, i32 1) | ||
| ret void | ||
| } | ||
|
|
||
| define i32 @add0(i32 %x, i32 %y) { | ||
| entry: | ||
| %res = add i32 %x, %y | ||
| ret i32 %res | ||
| } | ||
|
|
||
| ; Verify that we do not specialize once maximum codesize growth has been | ||
| ; exceeded. | ||
| define void @test_max_codesize_growth_exceeded(i32 %n) { | ||
| entry: | ||
| %spec0 = call i32 @add1(i32 0, i32 0) | ||
| %spec1 = call i32 @add1(i32 1, i32 1) | ||
| %spec2 = call i32 @add1(i32 2, i32 2) | ||
| %notspec = call i32 @add1(i32 3, i32 3) | ||
| ret void | ||
| } | ||
|
|
||
| define i32 @add1(i32 %x, i32 %y) { | ||
| entry: | ||
| %res = add i32 %x, %y | ||
| ret i32 %res | ||
| } | ||
|
|
||
| ; CHECK-LABEL: define void @test_specialize_after_failed_analysis( | ||
| ; CHECK-SAME: i32 [[N:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add0(i32 0, i32 [[N]]) | ||
| ; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add0(i32 1, i32 [[N]]) | ||
| ; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add0.specialized.1(i32 1, i32 1) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| ; | ||
| ; CHECK-LABEL: define i32 @add0( | ||
| ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] | ||
| ; CHECK-NEXT: ret i32 [[RES]] | ||
| ; | ||
| ; | ||
| ; CHECK-LABEL: define void @test_max_codesize_growth_exceeded( | ||
| ; CHECK-SAME: i32 [[N:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[SPEC0:%.*]] = call i32 @add1.specialized.2(i32 0, i32 0) | ||
| ; CHECK-NEXT: [[SPEC1:%.*]] = call i32 @add1.specialized.3(i32 1, i32 1) | ||
| ; CHECK-NEXT: [[SPEC2:%.*]] = call i32 @add1.specialized.4(i32 2, i32 2) | ||
| ; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @add1(i32 3, i32 3) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| ; | ||
| ; CHECK-LABEL: define i32 @add1( | ||
| ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] | ||
| ; CHECK-NEXT: ret i32 [[RES]] | ||
| ; | ||
| ; | ||
| ; CHECK-LABEL: define internal i32 @add0.specialized.1( | ||
| ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: ret i32 poison | ||
| ; | ||
| ; | ||
| ; CHECK-LABEL: define internal i32 @add1.specialized.2( | ||
| ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: ret i32 poison | ||
| ; | ||
| ; | ||
| ; CHECK-LABEL: define internal i32 @add1.specialized.3( | ||
| ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: ret i32 poison | ||
| ; | ||
| ; | ||
| ; CHECK-LABEL: define internal i32 @add1.specialized.4( | ||
| ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: ret i32 poison | ||
| ; |
Uh oh!
There was an error while loading. Please reload this page.