Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ static cl::opt<bool> SpecializeLiteralConstant(
"Enable specialization of functions that take a literal constant as an "
"argument"));

extern cl::opt<bool> ProfcheckDisableMetadataFixes;

bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB,
BasicBlock *Succ) const {
unsigned I = 0;
Expand Down Expand Up @@ -784,9 +786,31 @@ bool FunctionSpecializer::run() {

// Update the known call sites to call the clone.
for (CallBase *Call : S.CallSites) {
Function *Clone = S.Clone;
LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
<< " to call " << S.Clone->getName() << "\n");
<< " to call " << Clone->getName() << "\n");
Call->setCalledFunction(S.Clone);
auto &BFI = GetBFI(*Call->getFunction());
std::optional<uint64_t> Count =
BFI.getBlockProfileCount(Call->getParent());
if (Count && !ProfcheckDisableMetadataFixes) {
std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
Clone->getEntryCount();
assert(MaybeCloneCount && "Clone entry count was not set!");
uint64_t CallCount = *Count + MaybeCloneCount->getCount();
Clone->setEntryCount(CallCount);
if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
S.F->getEntryCount()) {
uint64_t OriginalCount = MaybeOriginalCount->getCount();
if (OriginalCount >= CallCount) {
S.F->setEntryCount(OriginalCount - CallCount);
} else {
// This should generally not happen as that would mean there are
// more computed calls to the function than what was recorded.
LLVM_DEBUG(S.F->setEntryCount(0));
}
}
}
}

Clones.push_back(S.Clone);
Expand Down Expand Up @@ -1043,6 +1067,9 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
// clone must.
Clone->setLinkage(GlobalValue::InternalLinkage);

if (F->getEntryCount() && !ProfcheckDisableMetadataFixes)
Clone->setEntryCount(0);

// Initialize the lattice state of the arguments of the function clone,
// marking the argument on which we specialized the function constant
// with the given value.
Expand Down
52 changes: 52 additions & 0 deletions llvm/test/Transforms/FunctionSpecialization/profile-counts.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

@A = external dso_local constant i32, align 4
@B = external dso_local constant i32, align 4

; CHECK: define dso_local i32 @bar(i32 %x, i32 %y, ptr %z) !prof ![[BAR_PROF:[0-9]]] {
define dso_local i32 @bar(i32 %x, i32 %y, ptr %z) !prof !0 {
entry:
%tobool = icmp ne i32 %x, 0
; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof ![[BRANCH_PROF:[0-9]]]
br i1 %tobool, label %if.then, label %if.else, !prof !1

; CHECK: if.then:
; CHECK: call i32 @foo.specialized.1(i32 %x, ptr @A)
if.then:
%call = call i32 @foo(i32 %x, ptr @A)
br label %return

; CHECK: if.else:
; CHECK: call i32 @foo.specialized.2(i32 %y, ptr @B)
if.else:
%call1 = call i32 @foo(i32 %y, ptr @B)
br label %return

; CHECK: return:
; CHECK: %call2 = call i32 @foo(i32 %x, ptr %z)
return:
%retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
%call2 = call i32 @foo(i32 %x, ptr %z);
%add = add i32 %retval.0, %call2
ret i32 %add
}

; CHECK: define internal i32 @foo(i32 %x, ptr %b) !prof ![[FOO_UNSPEC_PROF:[0-9]]]
; CHECK: define internal i32 @foo.specialized.1(i32 %x, ptr %b) !prof ![[FOO_SPEC_1_PROF:[0-9]]]
; CHECK: define internal i32 @foo.specialized.2(i32 %x, ptr %b) !prof ![[FOO_SPEC_2_PROF:[0-9]]]
define internal i32 @foo(i32 %x, ptr %b) !prof !2 {
entry:
%0 = load i32, ptr %b, align 4
%add = add nsw i32 %x, %0
ret i32 %add
}

; CHECK: ![[BAR_PROF]] = !{!"function_entry_count", i64 1000}
; CHECK: ![[BRANCH_PROF]] = !{!"branch_weights", i32 1, i32 3}
; CHECK: ![[FOO_UNSPEC_PROF]] = !{!"function_entry_count", i64 234}
; CHECK: ![[FOO_SPEC_1_PROF]] = !{!"function_entry_count", i64 250}
; CHECK: ![[FOO_SPEC_2_PROF]] = !{!"function_entry_count", i64 750}
!0 = !{!"function_entry_count", i64 1000}
!1 = !{!"branch_weights", i32 1, i32 3}
!2 = !{!"function_entry_count", i64 1234}