-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[ctxprof] Flatten indirect call info in pre-thinlink compilation #134766
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ctxprof] Flatten indirect call info in pre-thinlink compilation #134766
Conversation
a492868 to
31ec58f
Compare
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-pgo Author: Mircea Trofin (mtrofin) ChangesSame idea as in #134723 - flatten indirect call info in Full diff: https://github.com/llvm/llvm-project/pull/134766.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 023b5a9bdb848..6f1c3696ca78c 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -21,6 +21,10 @@ namespace llvm {
class CtxProfAnalysis;
+using FlatIndirectTargets = DenseMap<GlobalValue::GUID, uint64_t>;
+using CtxProfFlatIndirectCallProfile =
+ DenseMap<GlobalValue::GUID, DenseMap<uint32_t, FlatIndirectTargets>>;
+
/// The instrumented contextual profile, produced by the CtxProfAnalysis.
class PGOContextualProfile {
friend class CtxProfAnalysis;
@@ -101,6 +105,7 @@ class PGOContextualProfile {
void visit(ConstVisitor, const Function *F = nullptr) const;
const CtxProfFlatProfile flatten() const;
+ const CtxProfFlatIndirectCallProfile flattenVirtCalls() const;
bool invalidate(Module &, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 4042c87369462..304a77014f407 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -334,6 +334,20 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const {
return Flat;
}
+const CtxProfFlatIndirectCallProfile
+PGOContextualProfile::flattenVirtCalls() const {
+ CtxProfFlatIndirectCallProfile Ret;
+ preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
+ const PGOCtxProfContext>(
+ Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
+ auto &Targets = Ret[Ctx.guid()];
+ for (const auto &[ID, SubctxSet] : Ctx.callsites())
+ for (const auto &Subctx : SubctxSet)
+ Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
+ });
+ return Ret;
+}
+
void CtxProfAnalysis::collectIndirectCallPromotionList(
CallBase &IC, Result &Profile,
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index ffe0f385047c3..9b44d61726fa1 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -36,9 +36,12 @@
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <deque>
+#include <functional>
using namespace llvm;
+#define DEBUG_TYPE "ctx_prof_flatten"
+
namespace {
class ProfileAnnotator final {
@@ -414,6 +417,58 @@ void removeInstrumentation(Function &F) {
I.eraseFromParent();
}
+void annotateIndirectCall(
+ Module &M, CallBase &CB,
+ const DenseMap<uint32_t, FlatIndirectTargets> &FlatProf,
+ const InstrProfCallsite &Ins) {
+ auto Idx = Ins.getIndex()->getZExtValue();
+ auto FIt = FlatProf.find(Idx);
+ if (FIt == FlatProf.end())
+ return;
+ const auto &Targets = FIt->second;
+ SmallVector<InstrProfValueData, 2> Data;
+ uint64_t Sum = 0;
+ for (auto &[Guid, Count] : Targets) {
+ Data.push_back({/*.Value=*/Guid, /*.Count=*/Count});
+ Sum += Count;
+ }
+ struct InstrProfValueDataGTComparer {
+ bool operator()(const InstrProfValueData &A, const InstrProfValueData &B) {
+ return A.Count > B.Count;
+ }
+ };
+ llvm::sort(Data, InstrProfValueDataGTComparer());
+ llvm::annotateValueSite(M, CB, Data, Sum,
+ InstrProfValueKind::IPVK_IndirectCallTarget,
+ Data.size());
+ LLVM_DEBUG(dbgs() << "[ctxprof] flat indirect call prof: " << CB
+ << CB.getMetadata(LLVMContext::MD_prof) << "\n");
+}
+
+// We normally return a "Changed" bool, but the calling pass' run assumes
+// something will change - some profile will be added - so this won't add much
+// by returning false when applicable.
+void annotateIndCalls(Module &M, const CtxProfAnalysis::Result &CtxProf) {
+ const auto FlatIndCalls = CtxProf.flattenVirtCalls();
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto FlatProfIter = FlatIndCalls.find(AssignGUIDPass::getGUID(F));
+ if (FlatProfIter == FlatIndCalls.end())
+ continue;
+ const auto &FlatProf = FlatProfIter->second;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isIndirectCall())
+ continue;
+ if (auto *Ins = CtxProfAnalysis::getCallsiteInstrumentation(*CB))
+ annotateIndirectCall(M, *CB, FlatProf, *Ins);
+ }
+ }
+ }
+}
+
} // namespace
PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
@@ -437,6 +492,8 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
return PreservedAnalyses::none();
+ if (IsPreThinlink)
+ annotateIndCalls(M, CtxProf);
const auto FlattenedProfile = CtxProf.flatten();
for (auto &F : M) {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
new file mode 100644
index 0000000000000..13beddc05c7a2
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
@@ -0,0 +1,50 @@
+; REQUIRES:x86_64-linux
+
+; Test flattening indirect calls into "VP" MD_prof metadata, in prelink.
+
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromYAML --input %t/profile.yaml --output %t/profile.ctxprofdata
+; RUN: opt -passes=ctx-prof-flatten-prethinlink %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=PRELINK
+
+; PRELINK: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
+; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+; PRELINK-NEXT: call void @bar(){{$}}
+; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
+
+; RUN: cp %t/example.ll %t/1234.ll
+; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+
+; POSTLINK-NOT: call void %p(), !prof
+;--- example.ll
+
+declare !guid !0 void @bar()
+
+define void @foo(ptr %p) !guid !1 {
+ call void @llvm.instrprof.increment(ptr @foo, i64 1234, i32 1, i32 0)
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+ call void %p()
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+ call void @bar()
+ ret void
+}
+
+!0 = !{i64 8888}
+!1 = !{i64 1234}
+
+;--- profile.yaml
+Contexts:
+ - Guid: 1234
+ TotalRootEntryCount: 5
+ Counters: [5]
+ Callsites:
+ - - Guid: 5555
+ Counters: [1]
+ - Guid: 5678
+ Counters: [4]
+ - - Guid: 8888
+ Counters: [5]
|
|
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesSame idea as in #134723 - flatten indirect call info in Full diff: https://github.com/llvm/llvm-project/pull/134766.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 023b5a9bdb848..6f1c3696ca78c 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -21,6 +21,10 @@ namespace llvm {
class CtxProfAnalysis;
+using FlatIndirectTargets = DenseMap<GlobalValue::GUID, uint64_t>;
+using CtxProfFlatIndirectCallProfile =
+ DenseMap<GlobalValue::GUID, DenseMap<uint32_t, FlatIndirectTargets>>;
+
/// The instrumented contextual profile, produced by the CtxProfAnalysis.
class PGOContextualProfile {
friend class CtxProfAnalysis;
@@ -101,6 +105,7 @@ class PGOContextualProfile {
void visit(ConstVisitor, const Function *F = nullptr) const;
const CtxProfFlatProfile flatten() const;
+ const CtxProfFlatIndirectCallProfile flattenVirtCalls() const;
bool invalidate(Module &, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 4042c87369462..304a77014f407 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -334,6 +334,20 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const {
return Flat;
}
+const CtxProfFlatIndirectCallProfile
+PGOContextualProfile::flattenVirtCalls() const {
+ CtxProfFlatIndirectCallProfile Ret;
+ preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
+ const PGOCtxProfContext>(
+ Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
+ auto &Targets = Ret[Ctx.guid()];
+ for (const auto &[ID, SubctxSet] : Ctx.callsites())
+ for (const auto &Subctx : SubctxSet)
+ Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
+ });
+ return Ret;
+}
+
void CtxProfAnalysis::collectIndirectCallPromotionList(
CallBase &IC, Result &Profile,
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index ffe0f385047c3..9b44d61726fa1 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -36,9 +36,12 @@
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <deque>
+#include <functional>
using namespace llvm;
+#define DEBUG_TYPE "ctx_prof_flatten"
+
namespace {
class ProfileAnnotator final {
@@ -414,6 +417,58 @@ void removeInstrumentation(Function &F) {
I.eraseFromParent();
}
+void annotateIndirectCall(
+ Module &M, CallBase &CB,
+ const DenseMap<uint32_t, FlatIndirectTargets> &FlatProf,
+ const InstrProfCallsite &Ins) {
+ auto Idx = Ins.getIndex()->getZExtValue();
+ auto FIt = FlatProf.find(Idx);
+ if (FIt == FlatProf.end())
+ return;
+ const auto &Targets = FIt->second;
+ SmallVector<InstrProfValueData, 2> Data;
+ uint64_t Sum = 0;
+ for (auto &[Guid, Count] : Targets) {
+ Data.push_back({/*.Value=*/Guid, /*.Count=*/Count});
+ Sum += Count;
+ }
+ struct InstrProfValueDataGTComparer {
+ bool operator()(const InstrProfValueData &A, const InstrProfValueData &B) {
+ return A.Count > B.Count;
+ }
+ };
+ llvm::sort(Data, InstrProfValueDataGTComparer());
+ llvm::annotateValueSite(M, CB, Data, Sum,
+ InstrProfValueKind::IPVK_IndirectCallTarget,
+ Data.size());
+ LLVM_DEBUG(dbgs() << "[ctxprof] flat indirect call prof: " << CB
+ << CB.getMetadata(LLVMContext::MD_prof) << "\n");
+}
+
+// We normally return a "Changed" bool, but the calling pass' run assumes
+// something will change - some profile will be added - so this won't add much
+// by returning false when applicable.
+void annotateIndCalls(Module &M, const CtxProfAnalysis::Result &CtxProf) {
+ const auto FlatIndCalls = CtxProf.flattenVirtCalls();
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto FlatProfIter = FlatIndCalls.find(AssignGUIDPass::getGUID(F));
+ if (FlatProfIter == FlatIndCalls.end())
+ continue;
+ const auto &FlatProf = FlatProfIter->second;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isIndirectCall())
+ continue;
+ if (auto *Ins = CtxProfAnalysis::getCallsiteInstrumentation(*CB))
+ annotateIndirectCall(M, *CB, FlatProf, *Ins);
+ }
+ }
+ }
+}
+
} // namespace
PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
@@ -437,6 +492,8 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
return PreservedAnalyses::none();
+ if (IsPreThinlink)
+ annotateIndCalls(M, CtxProf);
const auto FlattenedProfile = CtxProf.flatten();
for (auto &F : M) {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
new file mode 100644
index 0000000000000..13beddc05c7a2
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
@@ -0,0 +1,50 @@
+; REQUIRES:x86_64-linux
+
+; Test flattening indirect calls into "VP" MD_prof metadata, in prelink.
+
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromYAML --input %t/profile.yaml --output %t/profile.ctxprofdata
+; RUN: opt -passes=ctx-prof-flatten-prethinlink %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=PRELINK
+
+; PRELINK: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
+; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+; PRELINK-NEXT: call void @bar(){{$}}
+; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
+
+; RUN: cp %t/example.ll %t/1234.ll
+; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+
+; POSTLINK-NOT: call void %p(), !prof
+;--- example.ll
+
+declare !guid !0 void @bar()
+
+define void @foo(ptr %p) !guid !1 {
+ call void @llvm.instrprof.increment(ptr @foo, i64 1234, i32 1, i32 0)
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+ call void %p()
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+ call void @bar()
+ ret void
+}
+
+!0 = !{i64 8888}
+!1 = !{i64 1234}
+
+;--- profile.yaml
+Contexts:
+ - Guid: 1234
+ TotalRootEntryCount: 5
+ Counters: [5]
+ Callsites:
+ - - Guid: 5555
+ Counters: [1]
+ - Guid: 5678
+ Counters: [4]
+ - - Guid: 8888
+ Counters: [5]
|
d1edfa2 to
a31066a
Compare
a9b122b to
666db91
Compare
a31066a to
e1eeb79
Compare
666db91 to
7a97443
Compare
e1eeb79 to
f78fe14
Compare
snehasish
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
f78fe14 to
e465744
Compare
7a97443 to
d0be075
Compare
e465744 to
d443d0c
Compare
d0be075 to
01dbaf7
Compare

Same idea as in #134723 - flatten indirect call info in
"VP"MD_profmetadata for the thinlinker, for cases that aren't covered by a contextual profile. If we don't ICP an indirect call target in the specialized module, the call will fall to the copy of that target outside the specialized module. If the graph under that target also has some indirect calls, in the absence of this pass, we'd have a steeper performance regression - because none of those would have a chance to be ICPed.