Skip to content

Commit ceae28d

Browse files
committed
[FunctionSpecialization] Preserve call counts of specialized functions
A function that has been specialized will have its function entry counts preserved as follows: * Each specialization's count is the sum of each call site's basic block's number of entries as computed by `BlockFrequencyInfo`. * The original function's count will be decreased by the counts of its specializations. Tracking issue: #147390
1 parent 90e9c5e commit ceae28d

File tree

2 files changed

+72
-1
lines changed

2 files changed

+72
-1
lines changed

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -784,9 +784,25 @@ bool FunctionSpecializer::run() {
784784

785785
// Update the known call sites to call the clone.
786786
for (CallBase *Call : S.CallSites) {
787+
Function *Clone = S.Clone;
787788
LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
788-
<< " to call " << S.Clone->getName() << "\n");
789+
<< " to call " << Clone->getName() << "\n");
789790
Call->setCalledFunction(S.Clone);
791+
if (std::optional<uint64_t> Count =
792+
GetBFI(*Call->getFunction())
793+
.getBlockProfileCount(Call->getParent())) {
794+
uint64_t CallCount = *Count + Clone->getEntryCount()->getCount();
795+
Clone->setEntryCount(CallCount);
796+
if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
797+
S.F->getEntryCount()) {
798+
uint64_t OriginalCount = MaybeOriginalCount->getCount();
799+
if (OriginalCount > CallCount) {
800+
S.F->setEntryCount(OriginalCount - CallCount);
801+
} else {
802+
S.F->setEntryCount(0);
803+
}
804+
}
805+
}
790806
}
791807

792808
Clones.push_back(S.Clone);
@@ -1043,6 +1059,9 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
10431059
// clone must.
10441060
Clone->setLinkage(GlobalValue::InternalLinkage);
10451061

1062+
if (F->getEntryCount())
1063+
Clone->setEntryCount(0);
1064+
10461065
// Initialize the lattice state of the arguments of the function clone,
10471066
// marking the argument on which we specialized the function constant
10481067
// with the given value.
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
2+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
3+
4+
@A = external dso_local constant i32, align 4
5+
@B = external dso_local constant i32, align 4
6+
7+
; CHECK: define dso_local i32 @bar(i32 %x, i32 %y, ptr %z) !prof ![[BAR_PROF:[0-9]]] {
8+
define dso_local i32 @bar(i32 %x, i32 %y, ptr %z) !prof !0 {
9+
entry:
10+
%tobool = icmp ne i32 %x, 0
11+
; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof ![[BRANCH_PROF:[0-9]]]
12+
br i1 %tobool, label %if.then, label %if.else, !prof !1
13+
14+
if.then:
15+
; CHECK: if.then:
16+
; CHECK: call i32 @foo.specialized.1(i32 %x, ptr @A)
17+
%call = call i32 @foo(i32 %x, ptr @A)
18+
br label %return
19+
20+
if.else:
21+
; CHECK: if.else:
22+
; CHECK: call i32 @foo.specialized.2(i32 %y, ptr @B)
23+
%call1 = call i32 @foo(i32 %y, ptr @B)
24+
br label %return
25+
26+
; CHECK: return:
27+
; CHECK: %call2 = call i32 @foo(i32 %x, ptr %z)
28+
return:
29+
%retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
30+
%call2 = call i32 @foo(i32 %x, ptr %z);
31+
%add = add i32 %retval.0, %call2
32+
ret i32 %add
33+
}
34+
35+
; CHECK: define internal i32 @foo(i32 %x, ptr %b) !prof ![[FOO_UNSPEC_PROF:[0-9]]]
36+
; CHECK: define internal i32 @foo.specialized.1(i32 %x, ptr %b) !prof ![[FOO_SPEC_1_PROF:[0-9]]]
37+
; CHECK: define internal i32 @foo.specialized.2(i32 %x, ptr %b) !prof ![[FOO_SPEC_2_PROF:[0-9]]]
38+
define internal i32 @foo(i32 %x, ptr %b) !prof !2 {
39+
entry:
40+
%0 = load i32, ptr %b, align 4
41+
%add = add nsw i32 %x, %0
42+
ret i32 %add
43+
}
44+
45+
; CHECK: ![[BAR_PROF]] = !{!"function_entry_count", i64 1000}
46+
; CHECK: ![[BRANCH_PROF]] = !{!"branch_weights", i32 1, i32 3}
47+
; CHECK: ![[FOO_UNSPEC_PROF]] = !{!"function_entry_count", i64 234}
48+
; CHECK: ![[FOO_SPEC_1_PROF]] = !{!"function_entry_count", i64 250}
49+
; CHECK: ![[FOO_SPEC_2_PROF]] = !{!"function_entry_count", i64 750}
50+
!0 = !{!"function_entry_count", i64 1000}
51+
!1 = !{!"branch_weights", i32 1, i32 3}
52+
!2 = !{!"function_entry_count", i64 1234}

0 commit comments

Comments
 (0)