Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,16 @@ struct Spec {
// Profitability of the specialization.
unsigned Score;

// Number of instructions in the specialization.
unsigned CodeSize;

// List of call sites, matching this specialization.
SmallVector<CallBase *> CallSites;

Spec(Function *F, const SpecSig &S, unsigned Score)
: F(F), Sig(S), Score(Score) {}
Spec(Function *F, const SpecSig &&S, unsigned Score)
: F(F), Sig(S), Score(Score) {}
Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize)
: F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize)
: F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
};

class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
Expand Down
41 changes: 22 additions & 19 deletions llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,18 @@ FunctionSpecializer::~FunctionSpecializer() {
cleanUpSSA();
}

/// Get the unsigned Value of given Cost object. Assumes the Cost is always
/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
/// always Valid.
static unsigned getCostValue(const Cost &C) {
int64_t Value = *C.getValue();

assert(Value >= 0 && "CodeSize and Latency cannot be negative");
// It is safe to down cast since we know the arguments cannot be negative and
// Cost is of type int64_t.
return static_cast<unsigned>(Value);
}

/// Attempt to specialize functions in the module to enable constant
/// propagation across function boundaries.
///
Expand Down Expand Up @@ -759,6 +771,11 @@ bool FunctionSpecializer::run() {
SmallVector<Function *> Clones;
for (unsigned I = 0; I < NSpecs; ++I) {
Spec &S = AllSpecs[BestSpecs[I]];

// Accumulate the codesize growth for the function, now we are creating the
// specialization.
FunctionGrowth[S.F] += S.CodeSize;

S.Clone = createSpecialization(S.F, S.Sig);

// Update the known call sites to call the clone.
Expand Down Expand Up @@ -837,18 +854,6 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
return Clone;
}

/// Get the unsigned Value of given Cost object. Assumes the Cost is always
/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
/// always Valid.
static unsigned getCostValue(const Cost &C) {
int64_t Value = *C.getValue();

assert(Value >= 0 && "CodeSize and Latency cannot be negative");
// It is safe to down cast since we know the arguments cannot be negative and
// Cost is of type int64_t.
return static_cast<unsigned>(Value);
}

bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs,
SpecMap &SM) {
Expand Down Expand Up @@ -924,16 +929,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
}
CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();

unsigned CodeSizeSavings = getCostValue(CodeSize);
unsigned SpecSize = FuncSize - CodeSizeSavings;

auto IsProfitable = [&]() -> bool {
// No check required.
if (ForceSpecialization)
return true;

unsigned CodeSizeSavings = getCostValue(CodeSize);
// TODO: We should only accumulate codesize increase of specializations
// that are actually created.
FunctionGrowth[F] += FuncSize - CodeSizeSavings;

LLVM_DEBUG(
dbgs() << "FnSpecialization: Specialization bonus {Inlining = "
<< Score << " (" << (Score * 100 / FuncSize) << "%)}\n");
Expand Down Expand Up @@ -964,7 +967,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
if (LatencySavings < MinLatencySavings * FuncSize / 100)
return false;
// Maximum codesize growth.
if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth)
if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth)
return false;

Score += std::max(CodeSizeSavings, LatencySavings);
Expand All @@ -976,7 +979,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
continue;

// Create a new specialisation entry.
auto &Spec = AllSpecs.emplace_back(F, S, Score);
auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);
if (CS.getFunction() != F)
Spec.CallSites.push_back(&CS);
const unsigned Index = AllSpecs.size() - 1;
Expand Down
44 changes: 44 additions & 0 deletions llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \
; RUN: -funcspec-for-literal-constant=true \
; RUN: -funcspec-min-codesize-savings=50 \
; RUN: -funcspec-min-latency-savings=50 \
; RUN: -funcspec-max-codesize-growth=1 \
; RUN: -S < %s | FileCheck %s

; Verify that we are able to specialize a function successfully after analysis
; of other specializations that are found to not be profitable.
define void @test_specialize_after_failed_analysis(i32 %n) {
entry:
%notspec0 = call i32 @add(i32 0, i32 %n)
%notspec1 = call i32 @add(i32 1, i32 %n)
%spec = call i32 @add(i32 1, i32 1)
ret void
}

define i32 @add(i32 %x, i32 %y) {
entry:
%res = add i32 %x, %y
ret i32 %res
}
; CHECK-LABEL: define void @test_specialize_after_failed_analysis(
; CHECK-SAME: i32 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]])
; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]])
; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define i32 @add(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]]
; CHECK-NEXT: ret i32 [[RES]]
;
;
; CHECK-LABEL: define internal i32 @add.specialized.1(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret i32 poison
;
Loading