diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index a459a9eddbcfc..30459caee1609 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -89,6 +89,8 @@ static cl::opt SpecializeLiteralConstant( "Enable specialization of functions that take a literal constant as an " "argument")); +extern cl::opt ProfcheckDisableMetadataFixes; + bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const { unsigned I = 0; @@ -784,9 +786,31 @@ bool FunctionSpecializer::run() { // Update the known call sites to call the clone. for (CallBase *Call : S.CallSites) { + Function *Clone = S.Clone; LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call - << " to call " << S.Clone->getName() << "\n"); + << " to call " << Clone->getName() << "\n"); Call->setCalledFunction(S.Clone); + auto &BFI = GetBFI(*Call->getFunction()); + std::optional Count = + BFI.getBlockProfileCount(Call->getParent()); + if (Count && !ProfcheckDisableMetadataFixes) { + std::optional MaybeCloneCount = + Clone->getEntryCount(); + assert(MaybeCloneCount && "Clone entry count was not set!"); + uint64_t CallCount = *Count + MaybeCloneCount->getCount(); + Clone->setEntryCount(CallCount); + if (std::optional MaybeOriginalCount = + S.F->getEntryCount()) { + uint64_t OriginalCount = MaybeOriginalCount->getCount(); + if (OriginalCount >= CallCount) { + S.F->setEntryCount(OriginalCount - CallCount); + } else { + // This should generally not happen as that would mean there are + // more computed calls to the function than what was recorded. + LLVM_DEBUG(S.F->setEntryCount(0)); + } + } + } } Clones.push_back(S.Clone); @@ -1043,6 +1067,9 @@ Function *FunctionSpecializer::createSpecialization(Function *F, // clone must. Clone->setLinkage(GlobalValue::InternalLinkage); + if (F->getEntryCount() && !ProfcheckDisableMetadataFixes) + Clone->setEntryCount(0); + // Initialize the lattice state of the arguments of the function clone, // marking the argument on which we specialized the function constant // with the given value. diff --git a/llvm/test/Transforms/FunctionSpecialization/profile-counts.ll b/llvm/test/Transforms/FunctionSpecialization/profile-counts.ll new file mode 100644 index 0000000000000..d5b2e35feb118 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/profile-counts.ll @@ -0,0 +1,52 @@ +; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +@A = external dso_local constant i32, align 4 +@B = external dso_local constant i32, align 4 + +; CHECK: define dso_local i32 @bar(i32 %x, i32 %y, ptr %z) !prof ![[BAR_PROF:[0-9]]] { +define dso_local i32 @bar(i32 %x, i32 %y, ptr %z) !prof !0 { +entry: + %tobool = icmp ne i32 %x, 0 +; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof ![[BRANCH_PROF:[0-9]]] + br i1 %tobool, label %if.then, label %if.else, !prof !1 + +; CHECK: if.then: +; CHECK: call i32 @foo.specialized.1(i32 %x, ptr @A) +if.then: + %call = call i32 @foo(i32 %x, ptr @A) + br label %return + +; CHECK: if.else: +; CHECK: call i32 @foo.specialized.2(i32 %y, ptr @B) +if.else: + %call1 = call i32 @foo(i32 %y, ptr @B) + br label %return + +; CHECK: return: +; CHECK: %call2 = call i32 @foo(i32 %x, ptr %z) +return: + %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] + %call2 = call i32 @foo(i32 %x, ptr %z); + %add = add i32 %retval.0, %call2 + ret i32 %add +} + +; CHECK: define internal i32 @foo(i32 %x, ptr %b) !prof ![[FOO_UNSPEC_PROF:[0-9]]] +; CHECK: define internal i32 @foo.specialized.1(i32 %x, ptr %b) !prof ![[FOO_SPEC_1_PROF:[0-9]]] +; CHECK: define internal i32 @foo.specialized.2(i32 %x, ptr %b) !prof ![[FOO_SPEC_2_PROF:[0-9]]] +define internal i32 @foo(i32 %x, ptr %b) !prof !2 { +entry: + %0 = load i32, ptr %b, align 4 + %add = add nsw i32 %x, %0 + ret i32 %add +} + +; CHECK: ![[BAR_PROF]] = !{!"function_entry_count", i64 1000} +; CHECK: ![[BRANCH_PROF]] = !{!"branch_weights", i32 1, i32 3} +; CHECK: ![[FOO_UNSPEC_PROF]] = !{!"function_entry_count", i64 234} +; CHECK: ![[FOO_SPEC_1_PROF]] = !{!"function_entry_count", i64 250} +; CHECK: ![[FOO_SPEC_2_PROF]] = !{!"function_entry_count", i64 750} +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 1, i32 3} +!2 = !{!"function_entry_count", i64 1234}