From 229fa2852010778aa8c0f5f3f4abf491ef400c86 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 16 Apr 2025 13:11:04 -0700 Subject: [PATCH] [ctxprof] Scale up everything under a root by its `TotalRootEntryCount` --- llvm/lib/Analysis/CtxProfAnalysis.cpp | 59 +++++++++++-------- .../CtxProfAnalysis/flatten-and-annotate.ll | 26 ++++---- .../CtxProfAnalysis/flatten-check-path.ll | 4 +- .../flatten-insert-icp-mdprof.ll | 2 +- .../CtxProfAnalysis/flatten-zero-path.ll | 2 +- .../Analysis/CtxProfAnalysis/full-cycle.ll | 8 +-- llvm/test/Analysis/CtxProfAnalysis/inline.ll | 8 ++- llvm/test/Analysis/CtxProfAnalysis/load.ll | 8 +-- 8 files changed, 66 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 391631e15aa89..0f0999cae49e5 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -621,17 +621,23 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) { return nullptr; } -template -static void preorderVisit(ProfilesTy &Profiles, - function_ref Visitor) { +template +static void preorderVisitOneRoot(ProfTy &Profile, + function_ref Visitor) { std::function Traverser = [&](auto &Ctx) { Visitor(Ctx); for (auto &[_, SubCtxSet] : Ctx.callsites()) for (auto &[__, Subctx] : SubCtxSet) Traverser(Subctx); }; + Traverser(Profile); +} + +template +static void preorderVisit(ProfilesTy &Profiles, + function_ref Visitor) { for (auto &[_, P] : Profiles) - Traverser(P); + preorderVisitOneRoot(P, Visitor); } void PGOContextualProfile::initIndex() { @@ -683,40 +689,47 @@ void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const { const CtxProfFlatProfile PGOContextualProfile::flatten() const { CtxProfFlatProfile Flat; auto Accummulate = [](SmallVectorImpl &Into, - const SmallVectorImpl &From) { + const SmallVectorImpl &From, + uint64_t SamplingRate) { if (Into.empty()) Into.resize(From.size()); assert(Into.size() == From.size() && "All contexts corresponding to a function should have the exact " "same number of counters."); for (size_t I = 0, E = Into.size(); I < E; ++I) - Into[I] += From[I]; + Into[I] += From[I] * SamplingRate; }; - preorderVisit( - Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) { - Accummulate(Flat[Ctx.guid()], Ctx.counters()); - }); - for (const auto &[_, RC] : Profiles.Contexts) - for (const auto &[G, Unh] : RC.getUnhandled()) - Accummulate(Flat[G], Unh); + for (const auto &[_, CtxRoot] : Profiles.Contexts) { + const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount(); + preorderVisitOneRoot( + CtxRoot, [&](const PGOCtxProfContext &Ctx) { + Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor); + }); + + for (const auto &[G, Unh] : CtxRoot.getUnhandled()) + Accummulate(Flat[G], Unh, SamplingFactor); + } + // We don't sample "Flat" currently, so sampling rate is 1. for (const auto &[G, FC] : Profiles.FlatProfiles) - Accummulate(Flat[G], FC); + Accummulate(Flat[G], FC, /*SamplingRate=*/1); return Flat; } const CtxProfFlatIndirectCallProfile PGOContextualProfile::flattenVirtCalls() const { CtxProfFlatIndirectCallProfile Ret; - preorderVisit( - Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) { - auto &Targets = Ret[Ctx.guid()]; - for (const auto &[ID, SubctxSet] : Ctx.callsites()) - for (const auto &Subctx : SubctxSet) - Targets[ID][Subctx.first] += Subctx.second.getEntrycount(); - }); + for (const auto &[_, CtxRoot] : Profiles.Contexts) { + const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount(); + preorderVisitOneRoot( + CtxRoot, [&](const PGOCtxProfContext &Ctx) { + auto &Targets = Ret[Ctx.guid()]; + for (const auto &[ID, SubctxSet] : Ctx.callsites()) + for (const auto &Subctx : SubctxSet) + Targets[ID][Subctx.first] += + Subctx.second.getEntrycount() * TotalRootEntryCount; + }); + } return Ret; } diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll index 6daf4f5020043..d91f44047e739 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll @@ -23,13 +23,13 @@ ; PRELINK-LABEL: yes: ; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1) ; PRELINK: ![[#]] = !{i32 1, !"ProfileSummary", !1} -; PRELINK: ![[#]] = !{!"TotalCount", i64 3595} -; PRELINK: ![[#]] = !{!"MaxCount", i64 3000} -; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 3000} -; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 300} +; PRELINK: ![[#]] = !{!"TotalCount", i64 151600} +; PRELINK: ![[#]] = !{!"MaxCount", i64 102000} +; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 102000} +; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 20100} ; PRELINK: ![[#]] = !{!"NumCounts", i64 6} ; PRELINK: ![[#]] = !{!"NumFunctions", i64 3} -; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 40, i32 60} +; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 4000, i32 6000} ; Check that the output has: ; - no instrumentation @@ -49,10 +49,10 @@ ; The postlink summary is restricted to the stuff under the root - including the ; "unhandled" data. ; POSTLINK: ![[#]] = !{i32 1, !"ProfileSummary", !1} -; POSTLINK: ![[#]] = !{!"TotalCount", i64 1495} -; POSTLINK: ![[#]] = !{!"MaxCount", i64 1000} -; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 1000} -; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 200} +; POSTLINK: ![[#]] = !{!"TotalCount", i64 149500} +; POSTLINK: ![[#]] = !{!"MaxCount", i64 100000} +; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 100000} +; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 20000} ; POSTLINK: ![[#]] = !{!"NumCounts", i64 6} ; POSTLINK: ![[#]] = !{!"NumFunctions", i64 3} @@ -60,14 +60,14 @@ ; @foo will be called both unconditionally and conditionally, on the "yes" branch ; which has a count of 40. So 140 times. -; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 140} +; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 14000} ; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo). ; Which means its "yes" branch is taken 140 - 15 times. -; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15} -; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100} -; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60} +; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 12500, i32 1500} +; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 10000} +; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 4000, i32 6000} ;--- profile.yaml Contexts: diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll index bf672998c1e39..d69768d295907 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll @@ -13,8 +13,8 @@ ; CHECK: br i1 %x, label %b1, label %exit, !prof ![[PROF1:[0-9]+]] ; CHECK: br i1 %y, label %blk, label %exit, !prof ![[PROF2:[0-9]+]] -; CHECK: ![[PROF1]] = !{!"branch_weights", i32 1, i32 1} -; CHECK: ![[PROF2]] = !{!"branch_weights", i32 0, i32 1} +; CHECK: ![[PROF1]] = !{!"branch_weights", i32 2, i32 2} +; CHECK: ![[PROF2]] = !{!"branch_weights", i32 0, i32 2} ; ASSERTION: Assertion `allTakenPathsExit() ; b1->exit is the only way out from b1, but the exit block would have been diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll index 13beddc05c7a2..2943dce43e01f 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll @@ -11,7 +11,7 @@ ; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]] ; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar) ; PRELINK-NEXT: call void @bar(){{$}} -; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1} +; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 25, i64 5678, i64 20, i64 5555, i64 5} ; RUN: cp %t/example.ll %t/1234.ll ; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \ diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll index 558f385b4bb9d..3d349119cfd0c 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll @@ -14,7 +14,7 @@ ; CHECK-LABEL: yes: ; CHECK: br i1 %t3, label %yes1, label %yes2, !prof ![[C1]] ; CHECK-NOT: !prof -; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0} +; CHECK: ![[C1]] = !{!"branch_weights", i32 72, i32 0} ;--- 1234.ll define void @f1(i32 %cond) !guid !0 { diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll index 63abdd892bffb..8cadcae1654c9 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll @@ -108,7 +108,7 @@ Contexts: Counters: [ 1, 2 ] Flat Profile: -2072045998141807037 : 7 -3087265239403591524 : 11 9 -4197650231481825559 : 2 -10507721908651011566 : 1 +2072045998141807037 : 70 +3087265239403591524 : 110 90 +4197650231481825559 : 20 +10507721908651011566 : 10 diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll b/llvm/test/Analysis/CtxProfAnalysis/inline.ll index a069acee1c943..77880dbce1ab8 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/inline.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll @@ -44,11 +44,13 @@ ; PIPELINE-LABEL: loop: ; PIPELINE: br i1 %cond, label %loop, label %exit, !prof ![[LOOP_BW_ORIG:[0-9]+]] -; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 10} +; *Note* that all values are multiplied by the TotalRootEntryCount, which is 24 +; +; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 240} ; These are the weights of the inlined @a, where the counters were 2, 100 (2 for entry, 100 for loop) -; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 98, i32 2} +; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 2352, i32 48} ; These are the weights of the un-inlined @a, where the counters were 8, 500 (8 for entry, 500 for loop) -; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 492, i32 8} +; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 11808, i32 192} ;--- 1000.ll define i32 @entrypoint(i32 %x) !guid !0 { diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll index bd21a4b710630..92e7fdc40e229 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/load.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll @@ -60,10 +60,10 @@ Contexts: Counters: [ 5 ] Flat Profile: -12341 : 9 -728453322856651412 : 6 7 -11872291593386833696 : 1 -12074870348631550642 : 5 +12341 : 810 +728453322856651412 : 24 28 +11872291593386833696 : 4 +12074870348631550642 : 120 ;--- example.ll declare void @bar()