-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[memprof] Undrift MemProf profile even when some frames are missing #120500
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[memprof] Undrift MemProf profile even when some frames are missing #120500
Conversation
This patch makes the MemProf undrifting process a little more lenient. Consider an inlined call hierarchy: foo -> bar -> ::new If bar tail-calls ::new, the profile appears to indicate that foo directly calls ::new. This is a problem because the perceived call hierarchy in the profile looks different from what we can obtain from the inline stack in the IR. Recall that undrifting works by constructing and comparing a list of direct calls from the profile and that from the IR. This patch modifies the construction of the latter. Specifically, if foo calls bar in the IR, but bar is missing the profile, we pretend that foo directly calls some heap allocation function. We apply this transformation only in the inline stack leading to some heap allocation function.
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Author: Kazu Hirata (kazutakahirata) ChangesThis patch makes the MemProf undrifting process a little more lenient. foo -> bar -> ::new If bar tail-calls ::new, the profile appears to indicate that foo Recall that undrifting works by constructing and comparing a list of Full diff: https://github.com/llvm/llvm-project/pull/120500.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index 344c9215fb822f..dcec60e512b630 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -67,7 +67,8 @@ namespace memprof {
// Extract all calls from the IR. Arrange them in a map from caller GUIDs to a
// list of call sites, each of the form {LineLocation, CalleeGUID}.
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
-extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI);
+extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
+ function_ref<bool(uint64_t)> IsPresentInProfile);
struct LineLocationHash {
uint64_t operator()(const LineLocation &Loc) const {
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index db8999911b7f92..c24fceb5ca27f7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -828,7 +828,8 @@ struct AllocMatchInfo {
};
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
-memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
+memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
+ function_ref<bool(uint64_t)> IsPresentInProfile) {
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
auto GetOffset = [](const DILocation *DIL) {
@@ -852,7 +853,12 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
continue;
StringRef CalleeName = CalledFunction->getName();
+ // True if we are calling a heap allocation function that supports
+ // hot/cold variants.
bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
+ // True for the first iteration below, indicating that we are looking at
+ // a leaf node.
+ bool IsLeaf = true;
for (const DILocation *DIL = I.getDebugLoc(); DIL;
DIL = DIL->getInlinedAt()) {
StringRef CallerName = DIL->getSubprogramLinkageName();
@@ -861,16 +867,27 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
// Pretend that we are calling a function with GUID == 0 if we are
- // calling a heap allocation function.
- if (IsAlloc)
- CalleeGUID = 0;
+ // in the inline stack leading to a heap allocation function.
+ if (IsAlloc) {
+ if (IsLeaf) {
+ // For leaf nodes, set CalleeGUID to 0 without consulting
+ // IsPresentInProfile.
+ CalleeGUID = 0;
+ } else if (!IsPresentInProfile(CalleeGUID)) {
+ // In addition to the leaf case above, continue to set CalleeGUID
+ // to 0 as long as we don't see CalleeGUID in the profile.
+ CalleeGUID = 0;
+ } else {
+ // Once we encounter a callee that exists in the profile, stop
+ // setting CalleeGUID to 0.
+ IsAlloc = false;
+ }
+ }
+
LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
CalleeName = CallerName;
- // FIXME: Recognize other frames that are associated with heap
- // allocation functions. It may be too early to reset IsAlloc to
- // false here.
- IsAlloc = false;
+ IsLeaf = false;
}
}
}
@@ -893,7 +910,9 @@ memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile =
MemProfReader->getMemProfCallerCalleePairs();
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR =
- extractCallsFromIR(M, TLI);
+ extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
+ return CallsFromProfile.contains(GUID);
+ });
// Compute an undrift map for each CallerGUID.
for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
diff --git a/llvm/test/Transforms/PGOProfile/memprof_undrift_missing_leaf.ll b/llvm/test/Transforms/PGOProfile/memprof_undrift_missing_leaf.ll
new file mode 100644
index 00000000000000..ca53b1dcedd7a7
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_undrift_missing_leaf.ll
@@ -0,0 +1,97 @@
+;; Tests memprof undrifting when the leaf frame is missing in the profile.
+;; This test case is taken from memprof_missing_leaf.ll with the profile
+;; drifted.
+
+;; Avoid failures on big-endian systems that can't read the profile properly
+; REQUIRES: x86_64-linux
+
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge %t/memprof_missing_leaf.yaml -o %t/memprof_missing_leaf.memprofdata
+; RUN: opt < %t/memprof_missing_leaf.ll -passes='memprof-use<profile-filename=%t/memprof_missing_leaf.memprofdata>' -memprof-salvage-stale-profile -S | FileCheck %s
+
+;--- memprof_missing_leaf.yaml
+---
+HeapProfileRecords:
+ - GUID: main
+ AllocSites:
+ - Callstack:
+ - { Function: main, LineOffset: 2, Column: 21, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 1
+ TotalLifetime: 0
+ TotalLifetimeAccessDensity: 0
+ CallSites: []
+...
+;--- memprof_missing_leaf.ll
+; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
+; CHECK: attributes #[[ATTR]] = {{.*}} "memprof"="notcold"
+
+; ModuleID = '<stdin>'
+source_filename = "memprof_missing_leaf.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znam(i64 noundef) #0
+
+; Function Attrs: mustprogress norecurse uwtable
+define dso_local noundef i32 @main() #1 !dbg !8 {
+entry:
+ %s.addr.i = alloca i64, align 8
+ %retval = alloca i32, align 4
+ %a = alloca ptr, align 8
+ store i32 0, ptr %retval, align 4
+ store i64 1, ptr %s.addr.i, align 8, !tbaa !11
+ %0 = load i64, ptr %s.addr.i, align 8, !dbg !15, !tbaa !11
+ %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef %0) #3, !dbg !18
+ store ptr %call.i, ptr %a, align 8, !dbg !19, !tbaa !20
+ %1 = load ptr, ptr %a, align 8, !dbg !22, !tbaa !20
+ %isnull = icmp eq ptr %1, null, !dbg !23
+ br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !23
+
+delete.notnull: ; preds = %entry
+ call void @_ZdlPv(ptr noundef %1) #4, !dbg !23
+ br label %delete.end, !dbg !23
+
+delete.end: ; preds = %delete.notnull, %entry
+ ret i32 0, !dbg !24
+}
+
+; Function Attrs: nobuiltin nounwind
+declare void @_ZdlPv(ptr noundef) #2
+
+attributes #0 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress norecurse uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nobuiltin nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { builtin allocsize(0) }
+attributes #4 = { builtin nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0 ([email protected]:llvm/llvm-project.git 71bf052ec90e77cb4aa66505d47cbc4b6016ac1d)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof_missing_leaf.cc", directory: ".", checksumkind: CSK_MD5, checksum: "f1445a8699406a6b826128704d257677")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 15, type: !9, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = !DISubroutineType(types: !10)
+!10 = !{}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"long", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C++ TBAA"}
+!15 = !DILocation(line: 11, column: 19, scope: !16, inlinedAt: !17)
+!16 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barm", scope: !1, file: !1, line: 7, type: !9, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!17 = distinct !DILocation(line: 16, column: 21, scope: !8)
+!18 = !DILocation(line: 11, column: 10, scope: !16, inlinedAt: !17)
+!19 = !DILocation(line: 16, column: 9, scope: !8)
+!20 = !{!21, !21, i64 0}
+!21 = !{!"any pointer", !13, i64 0}
+!22 = !DILocation(line: 17, column: 10, scope: !8)
+!23 = !DILocation(line: 17, column: 3, scope: !8)
+!24 = !DILocation(line: 18, column: 3, scope: !8)
diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
index c04f3c2a2863e1..1e96bf08f98cd0 100644
--- a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
+++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
@@ -92,7 +92,8 @@ declare !dbg !19 void @_Z2f3v()
TargetLibraryInfoWrapperPass WrapperPass;
auto &TLI = WrapperPass.getTLI(*F);
- auto Calls = extractCallsFromIR(*M, TLI);
+ auto IsPresentInProfile = [](uint64_t) { return true; };
+ auto Calls = extractCallsFromIR(*M, TLI, IsPresentInProfile);
// Expect exactly one caller.
ASSERT_THAT(Calls, SizeIs(1));
@@ -193,7 +194,8 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr
TargetLibraryInfoWrapperPass WrapperPass;
auto &TLI = WrapperPass.getTLI(*F);
- auto Calls = extractCallsFromIR(*M, TLI);
+ auto IsPresentInProfile = [](uint64_t) { return true; };
+ auto Calls = extractCallsFromIR(*M, TLI, IsPresentInProfile);
// Expect exactly 4 callers.
ASSERT_THAT(Calls, SizeIs(4));
@@ -288,7 +290,8 @@ attributes #2 = { builtin allocsize(0) }
TargetLibraryInfoWrapperPass WrapperPass;
auto &TLI = WrapperPass.getTLI(*F);
- auto Calls = extractCallsFromIR(*M, TLI);
+ auto IsPresentInProfile = [](uint64_t) { return true; };
+ auto Calls = extractCallsFromIR(*M, TLI, IsPresentInProfile);
// Expect exactly one caller.
ASSERT_THAT(Calls, SizeIs(1));
@@ -404,7 +407,8 @@ attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "t
TargetLibraryInfoWrapperPass WrapperPass;
auto &TLI = WrapperPass.getTLI(*F);
- auto Calls = extractCallsFromIR(*M, TLI);
+ auto IsPresentInProfile = [](uint64_t) { return true; };
+ auto Calls = extractCallsFromIR(*M, TLI, IsPresentInProfile);
uint64_t GUIDFoo = IndexedMemProfRecord::getGUID("_Z3foov");
uint64_t GUIDBar = IndexedMemProfRecord::getGUID("_Z3barv");
|
snehasish
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with some minor comments.
This patch makes the MemProf undrifting process a little more lenient.
Consider an inlined call hierarchy:
foo -> bar -> ::new
If bar tail-calls ::new, the profile appears to indicate that foo
directly calls ::new. This is a problem because the perceived call
hierarchy in the profile looks different from what we can obtain from
the inline stack in the IR.
Recall that undrifting works by constructing and comparing a list of
direct calls from the profile and that from the IR. This patch
modifies the construction of the latter. Specifically, if foo calls
bar in the IR, but bar is missing the profile, we pretend that foo
directly calls some heap allocation function. We apply this
transformation only in the inline stack leading to some heap
allocation function.