Skip to content

Commit a28f3cc

Browse files
[memprof] Undrift MemProf profile even when some frames are missing
This patch makes the MemProf undrifting process a little more lenient. Consider an inlined call hierarchy: foo -> bar -> ::new If bar tail-calls ::new, the profile appears to indicate that foo directly calls ::new. This is a problem because the perceived call hierarchy in the profile looks different from what we can obtain from the inline stack in the IR. Recall that undrifting works by constructing and comparing a list of direct calls from the profile and that from the IR. This patch modifies the construction of the latter. Specifically, if foo calls bar in the IR, but bar is missing the profile, we pretend that foo directly calls some heap allocation function. We apply this transformation only in the inline stack leading to some heap allocation function.
1 parent 2691b96 commit a28f3cc

File tree

4 files changed

+135
-14
lines changed

4 files changed

+135
-14
lines changed

llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ namespace memprof {
6767
// Extract all calls from the IR. Arrange them in a map from caller GUIDs to a
6868
// list of call sites, each of the form {LineLocation, CalleeGUID}.
6969
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
70-
extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI);
70+
extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
71+
function_ref<bool(uint64_t)> IsPresentInProfile);
7172

7273
struct LineLocationHash {
7374
uint64_t operator()(const LineLocation &Loc) const {

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,8 @@ struct AllocMatchInfo {
828828
};
829829

830830
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
831-
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
831+
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
832+
function_ref<bool(uint64_t)> IsPresentInProfile) {
832833
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
833834

834835
auto GetOffset = [](const DILocation *DIL) {
@@ -852,7 +853,12 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
852853
continue;
853854

854855
StringRef CalleeName = CalledFunction->getName();
856+
// True if we are calling a heap allocation function that supports
857+
// hot/cold variants.
855858
bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
859+
// True for the first iteration below, indicating that we are looking at
860+
// a leaf node.
861+
bool IsLeaf = true;
856862
for (const DILocation *DIL = I.getDebugLoc(); DIL;
857863
DIL = DIL->getInlinedAt()) {
858864
StringRef CallerName = DIL->getSubprogramLinkageName();
@@ -861,16 +867,27 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
861867
uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
862868
uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
863869
// Pretend that we are calling a function with GUID == 0 if we are
864-
// calling a heap allocation function.
865-
if (IsAlloc)
866-
CalleeGUID = 0;
870+
// in the inline stack leading to a heap allocation function.
871+
if (IsAlloc) {
872+
if (IsLeaf) {
873+
// For leaf nodes, set CalleeGUID to 0 without consulting
874+
// IsPresentInProfile.
875+
CalleeGUID = 0;
876+
} else if (!IsPresentInProfile(CalleeGUID)) {
877+
// In addition to the leaf case above, continue to set CalleeGUID
878+
// to 0 as long as we don't see CalleeGUID in the profile.
879+
CalleeGUID = 0;
880+
} else {
881+
// Once we encounter a callee that exists in the profile, stop
882+
// setting CalleeGUID to 0.
883+
IsAlloc = false;
884+
}
885+
}
886+
867887
LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
868888
Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
869889
CalleeName = CallerName;
870-
// FIXME: Recognize other frames that are associated with heap
871-
// allocation functions. It may be too early to reset IsAlloc to
872-
// false here.
873-
IsAlloc = false;
890+
IsLeaf = false;
874891
}
875892
}
876893
}
@@ -893,7 +910,9 @@ memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
893910
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile =
894911
MemProfReader->getMemProfCallerCalleePairs();
895912
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR =
896-
extractCallsFromIR(M, TLI);
913+
extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
914+
return CallsFromProfile.contains(GUID);
915+
});
897916

898917
// Compute an undrift map for each CallerGUID.
899918
for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
;; Tests memprof undrifting when the leaf frame is missing in the profile.
2+
;; This test case is taken from memprof_missing_leaf.ll with the profile
3+
;; drifted.
4+
5+
;; Avoid failures on big-endian systems that can't read the profile properly
6+
; REQUIRES: x86_64-linux
7+
8+
; RUN: split-file %s %t
9+
; RUN: llvm-profdata merge %t/memprof_missing_leaf.yaml -o %t/memprof_missing_leaf.memprofdata
10+
; RUN: opt < %t/memprof_missing_leaf.ll -passes='memprof-use<profile-filename=%t/memprof_missing_leaf.memprofdata>' -memprof-salvage-stale-profile -S | FileCheck %s
11+
12+
;--- memprof_missing_leaf.yaml
13+
---
14+
HeapProfileRecords:
15+
- GUID: main
16+
AllocSites:
17+
- Callstack:
18+
- { Function: main, LineOffset: 2, Column: 21, IsInlineFrame: false }
19+
MemInfoBlock:
20+
AllocCount: 1
21+
TotalSize: 1
22+
TotalLifetime: 0
23+
TotalLifetimeAccessDensity: 0
24+
CallSites: []
25+
...
26+
;--- memprof_missing_leaf.ll
27+
; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
28+
; CHECK: attributes #[[ATTR]] = {{.*}} "memprof"="notcold"
29+
30+
; ModuleID = '<stdin>'
31+
source_filename = "memprof_missing_leaf.cc"
32+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
33+
target triple = "x86_64-unknown-linux-gnu"
34+
35+
; Function Attrs: nobuiltin allocsize(0)
36+
declare noundef nonnull ptr @_Znam(i64 noundef) #0
37+
38+
; Function Attrs: mustprogress norecurse uwtable
39+
define dso_local noundef i32 @main() #1 !dbg !8 {
40+
entry:
41+
%s.addr.i = alloca i64, align 8
42+
%retval = alloca i32, align 4
43+
%a = alloca ptr, align 8
44+
store i32 0, ptr %retval, align 4
45+
store i64 1, ptr %s.addr.i, align 8, !tbaa !11
46+
%0 = load i64, ptr %s.addr.i, align 8, !dbg !15, !tbaa !11
47+
%call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef %0) #3, !dbg !18
48+
store ptr %call.i, ptr %a, align 8, !dbg !19, !tbaa !20
49+
%1 = load ptr, ptr %a, align 8, !dbg !22, !tbaa !20
50+
%isnull = icmp eq ptr %1, null, !dbg !23
51+
br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !23
52+
53+
delete.notnull: ; preds = %entry
54+
call void @_ZdlPv(ptr noundef %1) #4, !dbg !23
55+
br label %delete.end, !dbg !23
56+
57+
delete.end: ; preds = %delete.notnull, %entry
58+
ret i32 0, !dbg !24
59+
}
60+
61+
; Function Attrs: nobuiltin nounwind
62+
declare void @_ZdlPv(ptr noundef) #2
63+
64+
attributes #0 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
65+
attributes #1 = { mustprogress norecurse uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
66+
attributes #2 = { nobuiltin nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
67+
attributes #3 = { builtin allocsize(0) }
68+
attributes #4 = { builtin nounwind }
69+
70+
!llvm.dbg.cu = !{!0}
71+
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
72+
73+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0 ([email protected]:llvm/llvm-project.git 71bf052ec90e77cb4aa66505d47cbc4b6016ac1d)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
74+
!1 = !DIFile(filename: "memprof_missing_leaf.cc", directory: ".", checksumkind: CSK_MD5, checksum: "f1445a8699406a6b826128704d257677")
75+
!2 = !{i32 7, !"Dwarf Version", i32 5}
76+
!3 = !{i32 2, !"Debug Info Version", i32 3}
77+
!4 = !{i32 1, !"wchar_size", i32 4}
78+
!5 = !{i32 8, !"PIC Level", i32 2}
79+
!6 = !{i32 7, !"PIE Level", i32 2}
80+
!7 = !{i32 7, !"uwtable", i32 2}
81+
!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 15, type: !9, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
82+
!9 = !DISubroutineType(types: !10)
83+
!10 = !{}
84+
!11 = !{!12, !12, i64 0}
85+
!12 = !{!"long", !13, i64 0}
86+
!13 = !{!"omnipotent char", !14, i64 0}
87+
!14 = !{!"Simple C++ TBAA"}
88+
!15 = !DILocation(line: 11, column: 19, scope: !16, inlinedAt: !17)
89+
!16 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barm", scope: !1, file: !1, line: 7, type: !9, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
90+
!17 = distinct !DILocation(line: 16, column: 21, scope: !8)
91+
!18 = !DILocation(line: 11, column: 10, scope: !16, inlinedAt: !17)
92+
!19 = !DILocation(line: 16, column: 9, scope: !8)
93+
!20 = !{!21, !21, i64 0}
94+
!21 = !{!"any pointer", !13, i64 0}
95+
!22 = !DILocation(line: 17, column: 10, scope: !8)
96+
!23 = !DILocation(line: 17, column: 3, scope: !8)
97+
!24 = !DILocation(line: 18, column: 3, scope: !8)

llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ declare !dbg !19 void @_Z2f3v()
9292

9393
TargetLibraryInfoWrapperPass WrapperPass;
9494
auto &TLI = WrapperPass.getTLI(*F);
95-
auto Calls = extractCallsFromIR(*M, TLI);
95+
auto IsPresentInProfile = [](uint64_t) { return true; };
96+
auto Calls = extractCallsFromIR(*M, TLI, IsPresentInProfile);
9697

9798
// Expect exactly one caller.
9899
ASSERT_THAT(Calls, SizeIs(1));
@@ -193,7 +194,8 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr
193194

194195
TargetLibraryInfoWrapperPass WrapperPass;
195196
auto &TLI = WrapperPass.getTLI(*F);
196-
auto Calls = extractCallsFromIR(*M, TLI);
197+
auto IsPresentInProfile = [](uint64_t) { return true; };
198+
auto Calls = extractCallsFromIR(*M, TLI, IsPresentInProfile);
197199

198200
// Expect exactly 4 callers.
199201
ASSERT_THAT(Calls, SizeIs(4));
@@ -288,7 +290,8 @@ attributes #2 = { builtin allocsize(0) }
288290

289291
TargetLibraryInfoWrapperPass WrapperPass;
290292
auto &TLI = WrapperPass.getTLI(*F);
291-
auto Calls = extractCallsFromIR(*M, TLI);
293+
auto IsPresentInProfile = [](uint64_t) { return true; };
294+
auto Calls = extractCallsFromIR(*M, TLI, IsPresentInProfile);
292295

293296
// Expect exactly one caller.
294297
ASSERT_THAT(Calls, SizeIs(1));
@@ -404,7 +407,8 @@ attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "t
404407

405408
TargetLibraryInfoWrapperPass WrapperPass;
406409
auto &TLI = WrapperPass.getTLI(*F);
407-
auto Calls = extractCallsFromIR(*M, TLI);
410+
auto IsPresentInProfile = [](uint64_t) { return true; };
411+
auto Calls = extractCallsFromIR(*M, TLI, IsPresentInProfile);
408412

409413
uint64_t GUIDFoo = IndexedMemProfRecord::getGUID("_Z3foov");
410414
uint64_t GUIDBar = IndexedMemProfRecord::getGUID("_Z3barv");

0 commit comments

Comments
 (0)