Skip to content

Commit adf0c81

Browse files
[memprof] Undrift MemProf profile even when some frames are missing (#120500)
This patch makes the MemProf undrifting process a little more lenient. Consider an inlined call hierarchy: foo -> bar -> ::new If bar tail-calls ::new, the profile appears to indicate that foo directly calls ::new. This is a problem because the perceived call hierarchy in the profile looks different from what we can obtain from the inline stack in the IR. Recall that undrifting works by constructing and comparing a list of direct calls from the profile and that from the IR. This patch modifies the construction of the latter. Specifically, if foo calls bar in the IR, but bar is missing the profile, we pretend that foo directly calls some heap allocation function. We apply this transformation only in the inline stack leading to some heap allocation function.
1 parent e5de2a2 commit adf0c81

File tree

3 files changed

+125
-11
lines changed

3 files changed

+125
-11
lines changed

llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,11 @@ namespace memprof {
6666

6767
// Extract all calls from the IR. Arrange them in a map from caller GUIDs to a
6868
// list of call sites, each of the form {LineLocation, CalleeGUID}.
69-
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
70-
extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI);
69+
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> extractCallsFromIR(
70+
Module &M, const TargetLibraryInfo &TLI,
71+
function_ref<bool(uint64_t)> IsPresentInProfile = [](uint64_t) {
72+
return true;
73+
});
7174

7275
struct LineLocationHash {
7376
uint64_t operator()(const LineLocation &Loc) const {

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,8 @@ struct AllocMatchInfo {
832832
};
833833

834834
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
835-
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
835+
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
836+
function_ref<bool(uint64_t)> IsPresentInProfile) {
836837
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
837838

838839
auto GetOffset = [](const DILocation *DIL) {
@@ -856,7 +857,12 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
856857
continue;
857858

858859
StringRef CalleeName = CalledFunction->getName();
860+
// True if we are calling a heap allocation function that supports
861+
// hot/cold variants.
859862
bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
863+
// True for the first iteration below, indicating that we are looking at
864+
// a leaf node.
865+
bool IsLeaf = true;
860866
for (const DILocation *DIL = I.getDebugLoc(); DIL;
861867
DIL = DIL->getInlinedAt()) {
862868
StringRef CallerName = DIL->getSubprogramLinkageName();
@@ -865,16 +871,27 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
865871
uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
866872
uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
867873
// Pretend that we are calling a function with GUID == 0 if we are
868-
// calling a heap allocation function.
869-
if (IsAlloc)
870-
CalleeGUID = 0;
874+
// in the inline stack leading to a heap allocation function.
875+
if (IsAlloc) {
876+
if (IsLeaf) {
877+
// For leaf nodes, set CalleeGUID to 0 without consulting
878+
// IsPresentInProfile.
879+
CalleeGUID = 0;
880+
} else if (!IsPresentInProfile(CalleeGUID)) {
881+
// In addition to the leaf case above, continue to set CalleeGUID
882+
// to 0 as long as we don't see CalleeGUID in the profile.
883+
CalleeGUID = 0;
884+
} else {
885+
// Once we encounter a callee that exists in the profile, stop
886+
// setting CalleeGUID to 0.
887+
IsAlloc = false;
888+
}
889+
}
890+
871891
LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
872892
Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
873893
CalleeName = CallerName;
874-
// FIXME: Recognize other frames that are associated with heap
875-
// allocation functions. It may be too early to reset IsAlloc to
876-
// false here.
877-
IsAlloc = false;
894+
IsLeaf = false;
878895
}
879896
}
880897
}
@@ -897,7 +914,9 @@ memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
897914
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile =
898915
MemProfReader->getMemProfCallerCalleePairs();
899916
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR =
900-
extractCallsFromIR(M, TLI);
917+
extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
918+
return CallsFromProfile.contains(GUID);
919+
});
901920

902921
// Compute an undrift map for each CallerGUID.
903922
for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
;; Tests memprof undrifting when the leaf frame is missing in the profile.
2+
;; This test case is taken from memprof_missing_leaf.ll with the profile
3+
;; drifted.
4+
5+
; RUN: split-file %s %t
6+
; RUN: llvm-profdata merge %t/memprof_missing_leaf.yaml -o %t/memprof_missing_leaf.memprofdata
7+
; RUN: opt < %t/memprof_missing_leaf.ll -passes='memprof-use<profile-filename=%t/memprof_missing_leaf.memprofdata>' -memprof-salvage-stale-profile -S | FileCheck %s
8+
9+
;--- memprof_missing_leaf.yaml
10+
---
11+
HeapProfileRecords:
12+
- GUID: main
13+
AllocSites:
14+
- Callstack:
15+
- { Function: main, LineOffset: 2, Column: 21, IsInlineFrame: false }
16+
MemInfoBlock:
17+
AllocCount: 1
18+
TotalSize: 1
19+
CallSites: []
20+
...
21+
;--- memprof_missing_leaf.ll
22+
; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
23+
; CHECK: attributes #[[ATTR]] = {{.*}} "memprof"="notcold"
24+
25+
; ModuleID = '<stdin>'
26+
source_filename = "memprof_missing_leaf.cc"
27+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
28+
target triple = "x86_64-unknown-linux-gnu"
29+
30+
; Function Attrs: nobuiltin allocsize(0)
31+
declare noundef nonnull ptr @_Znam(i64 noundef) #0
32+
33+
; Function Attrs: mustprogress norecurse uwtable
34+
define dso_local noundef i32 @main() #1 !dbg !8 {
35+
entry:
36+
%s.addr.i = alloca i64, align 8
37+
%retval = alloca i32, align 4
38+
%a = alloca ptr, align 8
39+
store i32 0, ptr %retval, align 4
40+
store i64 1, ptr %s.addr.i, align 8, !tbaa !11
41+
%0 = load i64, ptr %s.addr.i, align 8, !dbg !15, !tbaa !11
42+
%call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef %0) #3, !dbg !18
43+
store ptr %call.i, ptr %a, align 8, !dbg !19, !tbaa !20
44+
%1 = load ptr, ptr %a, align 8, !dbg !22, !tbaa !20
45+
%isnull = icmp eq ptr %1, null, !dbg !23
46+
br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !23
47+
48+
delete.notnull: ; preds = %entry
49+
call void @_ZdlPv(ptr noundef %1) #4, !dbg !23
50+
br label %delete.end, !dbg !23
51+
52+
delete.end: ; preds = %delete.notnull, %entry
53+
ret i32 0, !dbg !24
54+
}
55+
56+
; Function Attrs: nobuiltin nounwind
57+
declare void @_ZdlPv(ptr noundef) #2
58+
59+
attributes #0 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
60+
attributes #1 = { mustprogress norecurse uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
61+
attributes #2 = { nobuiltin nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
62+
attributes #3 = { builtin allocsize(0) }
63+
attributes #4 = { builtin nounwind }
64+
65+
!llvm.dbg.cu = !{!0}
66+
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
67+
68+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0 ([email protected]:llvm/llvm-project.git 71bf052ec90e77cb4aa66505d47cbc4b6016ac1d)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
69+
!1 = !DIFile(filename: "memprof_missing_leaf.cc", directory: ".", checksumkind: CSK_MD5, checksum: "f1445a8699406a6b826128704d257677")
70+
!2 = !{i32 7, !"Dwarf Version", i32 5}
71+
!3 = !{i32 2, !"Debug Info Version", i32 3}
72+
!4 = !{i32 1, !"wchar_size", i32 4}
73+
!5 = !{i32 8, !"PIC Level", i32 2}
74+
!6 = !{i32 7, !"PIE Level", i32 2}
75+
!7 = !{i32 7, !"uwtable", i32 2}
76+
!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 15, type: !9, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
77+
!9 = !DISubroutineType(types: !10)
78+
!10 = !{}
79+
!11 = !{!12, !12, i64 0}
80+
!12 = !{!"long", !13, i64 0}
81+
!13 = !{!"omnipotent char", !14, i64 0}
82+
!14 = !{!"Simple C++ TBAA"}
83+
!15 = !DILocation(line: 11, column: 19, scope: !16, inlinedAt: !17)
84+
!16 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barm", scope: !1, file: !1, line: 7, type: !9, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
85+
!17 = distinct !DILocation(line: 16, column: 21, scope: !8)
86+
!18 = !DILocation(line: 11, column: 10, scope: !16, inlinedAt: !17)
87+
!19 = !DILocation(line: 16, column: 9, scope: !8)
88+
!20 = !{!21, !21, i64 0}
89+
!21 = !{!"any pointer", !13, i64 0}
90+
!22 = !DILocation(line: 17, column: 10, scope: !8)
91+
!23 = !DILocation(line: 17, column: 3, scope: !8)
92+
!24 = !DILocation(line: 18, column: 3, scope: !8)

0 commit comments

Comments
 (0)