Skip to content

Commit 421e8a7

Browse files
[memprof] Print alloc site matches immediately
Without this patch, we buffer alloc site matches in FullStackIdToAllocMatchInfo and then print them out at the end of MemProfUsePass. This practice is problematic when we have multiple matches per alloc site. Consider: char *f1() { return new char[3]; } char *f2() { return f1(); } __attribute__((noinline)) char *f3() { return f2(); } In this example, f1 contains an alloc site, of course, but so do f2 and f3 via inlining. When something like this happens, FullStackIdToAllocMatchInfo gets updated multiple times for the same full stack ID at: FullStackIdToAllocMatchInfo[FullStackId] = { ... }; with different InlinedCallStack.size() each time. This patch changes the behavior by immediately printing out alloc site matches, potentially printing out multiple matches for the same FullStackId. It is up to the consumer of the message to figure out the length of the longest matches for example. For the test, this test adjusts an existing one, memprof-dump-matched-alloc-site.ll. Specifically, this patch "restores" the IR and corresponding profile for f2 and f1 so that the compiler generates a "MemProf notcold" message for each of f1, f2, and f3.
1 parent f1886b1 commit 421e8a7

File tree

3 files changed

+76
-48
lines changed

3 files changed

+76
-48
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -810,13 +810,6 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
810810
}
811811
}
812812

813-
struct AllocMatchInfo {
814-
uint64_t TotalSize = 0;
815-
size_t NumFramesMatched = 0;
816-
AllocationType AllocType = AllocationType::None;
817-
bool Matched = false;
818-
};
819-
820813
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
821814
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
822815
function_ref<bool(uint64_t)> IsPresentInProfile) {
@@ -952,13 +945,12 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
952945
UndriftCallStack(CS.Frames);
953946
}
954947

955-
static void
956-
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
957-
const TargetLibraryInfo &TLI,
958-
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
959-
std::set<std::vector<uint64_t>> &MatchedCallSites,
960-
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
961-
OptimizationRemarkEmitter &ORE) {
948+
static void readMemprof(Module &M, Function &F,
949+
IndexedInstrProfReader *MemProfReader,
950+
const TargetLibraryInfo &TLI,
951+
std::set<std::vector<uint64_t>> &MatchedCallSites,
952+
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
953+
OptimizationRemarkEmitter &ORE) {
962954
auto &Ctx = M.getContext();
963955
// Previously we used getIRPGOFuncName() here. If F is local linkage,
964956
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1146,9 +1138,11 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
11461138
// was requested.
11471139
if (ClPrintMemProfMatchInfo) {
11481140
assert(FullStackId != 0);
1149-
FullStackIdToAllocMatchInfo[FullStackId] = {
1150-
AllocInfo->Info.getTotalSize(), InlinedCallStack.size(),
1151-
AllocType, /*Matched=*/true};
1141+
errs() << "MemProf " << getAllocTypeAttributeString(AllocType)
1142+
<< " context with id " << FullStackId
1143+
<< " has total profiled size "
1144+
<< AllocInfo->Info.getTotalSize() << " is matched with "
1145+
<< InlinedCallStack.size() << " frames\n";
11521146
}
11531147
}
11541148
}
@@ -1258,11 +1252,6 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12581252
if (SalvageStaleProfile)
12591253
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
12601254

1261-
// Map from the stack has of each allocation context in the function profiles
1262-
// to the total profiled size (bytes), allocation type, and whether we matched
1263-
// it to an allocation in the IR.
1264-
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
1265-
12661255
// Set of the matched call sites, each expressed as a sequence of an inline
12671256
// call stack.
12681257
std::set<std::vector<uint64_t>> MatchedCallSites;
@@ -1273,17 +1262,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12731262

12741263
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
12751264
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1276-
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1277-
MatchedCallSites, UndriftMaps, ORE);
1265+
readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps,
1266+
ORE);
12781267
}
12791268

12801269
if (ClPrintMemProfMatchInfo) {
1281-
for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1282-
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1283-
<< " context with id " << Id << " has total profiled size "
1284-
<< Info.TotalSize << (Info.Matched ? " is" : " not")
1285-
<< " matched with " << Info.NumFramesMatched << " frames\n";
1286-
12871270
for (const auto &CallStack : MatchedCallSites) {
12881271
errs() << "MemProf callsite match for inline call stack";
12891272
for (uint64_t StackId : CallStack)

llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,41 @@
3131
;--- memprof-dump-matched-alloc-site.yaml
3232
---
3333
HeapProfileRecords:
34+
- GUID: _Z2f2v
35+
AllocSites:
36+
- Callstack:
37+
- { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
38+
- { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
39+
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
40+
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
41+
MemInfoBlock:
42+
AllocCount: 1
43+
TotalSize: 3
44+
TotalLifetime: 0
45+
TotalLifetimeAccessDensity: 0
46+
CallSites:
47+
- Frames:
48+
- { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
49+
- { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
50+
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
51+
- GUID: _Z2f1v
52+
AllocSites:
53+
- Callstack:
54+
- { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
55+
- { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
56+
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
57+
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
58+
MemInfoBlock:
59+
AllocCount: 1
60+
TotalSize: 3
61+
TotalLifetime: 0
62+
TotalLifetimeAccessDensity: 0
63+
CallSites: []
3464
- GUID: _Z2f3v
3565
AllocSites:
3666
- Callstack:
37-
- { Function: _ZL2f1v, LineOffset: 0, Column: 35, IsInlineFrame: true }
38-
- { Function: _ZL2f2v, LineOffset: 0, Column: 35, IsInlineFrame: true }
67+
- { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
68+
- { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
3969
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
4070
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
4171
MemInfoBlock:
@@ -47,32 +77,47 @@ HeapProfileRecords:
4777
# Kept empty here because this section is irrelevant for this test.
4878
...
4979
;--- memprof-dump-matched-alloc-site.ll
50-
; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames
80+
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
81+
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
82+
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
5183

5284
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5385
target triple = "x86_64-unknown-linux-gnu"
5486

55-
define ptr @_Z2f3v() {
87+
define ptr @_Z2f1v() {
5688
entry:
57-
%call.i.i = call ptr @_Znam(i64 0), !dbg !3
58-
ret ptr null
89+
%call = call ptr @_Znam(i64 0), !dbg !3
90+
ret ptr %call
5991
}
6092

6193
declare ptr @_Znam(i64)
6294

95+
define ptr @_Z2f2v() {
96+
entry:
97+
%call.i = call ptr @_Znam(i64 0), !dbg !7
98+
ret ptr %call.i
99+
}
100+
101+
define ptr @_Z2f3v() {
102+
entry:
103+
%call.i.i = call ptr @_Znam(i64 0), !dbg !10
104+
ret ptr %call.i.i
105+
}
106+
63107
!llvm.dbg.cu = !{!0}
64108
!llvm.module.flags = !{!2}
65109

66110
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
67111
!1 = !DIFile(filename: "memprof-dump-matched-alloc-site.cc", directory: "/")
68112
!2 = !{i32 2, !"Debug Info Version", i32 3}
69-
!3 = !DILocation(line: 1, column: 35, scope: !4, inlinedAt: !7)
70-
!4 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
113+
!3 = !DILocation(line: 1, column: 21, scope: !4)
114+
!4 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
71115
!5 = !DISubroutineType(types: !6)
72116
!6 = !{}
73-
!7 = distinct !DILocation(line: 2, column: 35, scope: !8, inlinedAt: !9)
74-
!8 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
75-
!9 = distinct !DILocation(line: 3, column: 47, scope: !10)
76-
!10 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
77-
!11 = !DILocation(line: 6, column: 3, scope: !12)
78-
!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !5, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
117+
!7 = !DILocation(line: 1, column: 21, scope: !4, inlinedAt: !8)
118+
!8 = distinct !DILocation(line: 2, column: 21, scope: !9)
119+
!9 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
120+
!10 = !DILocation(line: 1, column: 21, scope: !4, inlinedAt: !11)
121+
!11 = distinct !DILocation(line: 2, column: 21, scope: !9, inlinedAt: !12)
122+
!12 = distinct !DILocation(line: 3, column: 47, scope: !13)
123+
!13 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,13 @@
111111
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
112112

113113
; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
114-
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
115-
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
116114
; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
117-
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
118-
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
119115
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
120116
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
117+
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
118+
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
119+
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
120+
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
121121
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
122122
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
123123
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691

0 commit comments

Comments
 (0)