Skip to content

Commit 6b6e8e1

Browse files
authored
[FunctionSpecialization] Preserve call counts of specialized functions (#157768)
A function that has been specialized will have its function entry counts preserved as follows: * Each specialization's count is the sum of each call site's basic block's number of entries as computed by `BlockFrequencyInfo`. * The original function's count will be decreased by the counts of its specializations. Tracking issue: #147390
1 parent 671455a commit 6b6e8e1

File tree

2 files changed

+80
-1
lines changed

2 files changed

+80
-1
lines changed

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ static cl::opt<bool> SpecializeLiteralConstant(
8989
"Enable specialization of functions that take a literal constant as an "
9090
"argument"));
9191

92+
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
93+
9294
bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB,
9395
BasicBlock *Succ) const {
9496
unsigned I = 0;
@@ -784,9 +786,31 @@ bool FunctionSpecializer::run() {
784786

785787
// Update the known call sites to call the clone.
786788
for (CallBase *Call : S.CallSites) {
789+
Function *Clone = S.Clone;
787790
LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
788-
<< " to call " << S.Clone->getName() << "\n");
791+
<< " to call " << Clone->getName() << "\n");
789792
Call->setCalledFunction(S.Clone);
793+
auto &BFI = GetBFI(*Call->getFunction());
794+
std::optional<uint64_t> Count =
795+
BFI.getBlockProfileCount(Call->getParent());
796+
if (Count && !ProfcheckDisableMetadataFixes) {
797+
std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
798+
Clone->getEntryCount();
799+
assert(MaybeCloneCount && "Clone entry count was not set!");
800+
uint64_t CallCount = *Count + MaybeCloneCount->getCount();
801+
Clone->setEntryCount(CallCount);
802+
if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
803+
S.F->getEntryCount()) {
804+
uint64_t OriginalCount = MaybeOriginalCount->getCount();
805+
if (OriginalCount >= CallCount) {
806+
S.F->setEntryCount(OriginalCount - CallCount);
807+
} else {
808+
// This should generally not happen as that would mean there are
809+
// more computed calls to the function than what was recorded.
810+
LLVM_DEBUG(S.F->setEntryCount(0));
811+
}
812+
}
813+
}
790814
}
791815

792816
Clones.push_back(S.Clone);
@@ -1043,6 +1067,9 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
10431067
// clone must.
10441068
Clone->setLinkage(GlobalValue::InternalLinkage);
10451069

1070+
if (F->getEntryCount() && !ProfcheckDisableMetadataFixes)
1071+
Clone->setEntryCount(0);
1072+
10461073
// Initialize the lattice state of the arguments of the function clone,
10471074
// marking the argument on which we specialized the function constant
10481075
// with the given value.
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
2+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
3+
4+
@A = external dso_local constant i32, align 4
5+
@B = external dso_local constant i32, align 4
6+
7+
; CHECK: define dso_local i32 @bar(i32 %x, i32 %y, ptr %z) !prof ![[BAR_PROF:[0-9]]] {
8+
define dso_local i32 @bar(i32 %x, i32 %y, ptr %z) !prof !0 {
9+
entry:
10+
%tobool = icmp ne i32 %x, 0
11+
; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof ![[BRANCH_PROF:[0-9]]]
12+
br i1 %tobool, label %if.then, label %if.else, !prof !1
13+
14+
; CHECK: if.then:
15+
; CHECK: call i32 @foo.specialized.1(i32 %x, ptr @A)
16+
if.then:
17+
%call = call i32 @foo(i32 %x, ptr @A)
18+
br label %return
19+
20+
; CHECK: if.else:
21+
; CHECK: call i32 @foo.specialized.2(i32 %y, ptr @B)
22+
if.else:
23+
%call1 = call i32 @foo(i32 %y, ptr @B)
24+
br label %return
25+
26+
; CHECK: return:
27+
; CHECK: %call2 = call i32 @foo(i32 %x, ptr %z)
28+
return:
29+
%retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
30+
%call2 = call i32 @foo(i32 %x, ptr %z);
31+
%add = add i32 %retval.0, %call2
32+
ret i32 %add
33+
}
34+
35+
; CHECK: define internal i32 @foo(i32 %x, ptr %b) !prof ![[FOO_UNSPEC_PROF:[0-9]]]
36+
; CHECK: define internal i32 @foo.specialized.1(i32 %x, ptr %b) !prof ![[FOO_SPEC_1_PROF:[0-9]]]
37+
; CHECK: define internal i32 @foo.specialized.2(i32 %x, ptr %b) !prof ![[FOO_SPEC_2_PROF:[0-9]]]
38+
define internal i32 @foo(i32 %x, ptr %b) !prof !2 {
39+
entry:
40+
%0 = load i32, ptr %b, align 4
41+
%add = add nsw i32 %x, %0
42+
ret i32 %add
43+
}
44+
45+
; CHECK: ![[BAR_PROF]] = !{!"function_entry_count", i64 1000}
46+
; CHECK: ![[BRANCH_PROF]] = !{!"branch_weights", i32 1, i32 3}
47+
; CHECK: ![[FOO_UNSPEC_PROF]] = !{!"function_entry_count", i64 234}
48+
; CHECK: ![[FOO_SPEC_1_PROF]] = !{!"function_entry_count", i64 250}
49+
; CHECK: ![[FOO_SPEC_2_PROF]] = !{!"function_entry_count", i64 750}
50+
!0 = !{!"function_entry_count", i64 1000}
51+
!1 = !{!"branch_weights", i32 1, i32 3}
52+
!2 = !{!"function_entry_count", i64 1234}

0 commit comments

Comments
 (0)