Skip to content

Commit ec5c64f

Browse files
Address comments.
1 parent a919358 commit ec5c64f

File tree

3 files changed

+152
-18
lines changed

3 files changed

+152
-18
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -966,11 +966,12 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
966966
UndriftCallStack(CS);
967967
}
968968

969-
static void
970-
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
971-
const TargetLibraryInfo &TLI,
972-
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
973-
DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
969+
static void readMemprof(
970+
Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
971+
const TargetLibraryInfo &TLI,
972+
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
973+
std::set<std::vector<uint64_t>> &MatchedCallSites,
974+
DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
974975
auto &Ctx = M.getContext();
975976
// Previously we used getIRPGOFuncName() here. If F is local linkage,
976977
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1034,15 +1035,13 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10341035
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
10351036
// A hash function for std::unordered_set<ArrayRef<Frame>> to work.
10361037
struct CallStackHash {
1037-
size_t operator()(const std::pair<ArrayRef<Frame>, unsigned> &CS) const {
1038-
auto &[CallStack, Idx] = CS;
1039-
return computeFullStackId(ArrayRef<Frame>(CallStack).drop_front(Idx));
1038+
size_t operator()(ArrayRef<Frame> CS) const {
1039+
return computeFullStackId(CS);
10401040
}
10411041
};
10421042
// For the callsites we need to record slices of the frame array (see comments
10431043
// below where the map entries are added).
1044-
std::map<uint64_t, std::unordered_set<std::pair<ArrayRef<Frame>, unsigned>,
1045-
CallStackHash>>
1044+
std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
10461045
LocHashToCallSites;
10471046
for (auto &AI : MemProfRec->AllocSites) {
10481047
NumOfMemProfAllocContextProfiles++;
@@ -1060,7 +1059,7 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10601059
unsigned Idx = 0;
10611060
for (auto &StackFrame : CS) {
10621061
uint64_t StackId = computeStackId(StackFrame);
1063-
LocHashToCallSites[StackId].emplace(CS, Idx++);
1062+
LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++));
10641063
ProfileHasColumns |= StackFrame.Column;
10651064
// Once we find this function, we can stop recording.
10661065
if (StackFrame.Function == FuncGUID)
@@ -1203,21 +1202,21 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
12031202
// instruction's leaf location in the callsites map and not the allocation
12041203
// map.
12051204
assert(CallSitesIter != LocHashToCallSites.end());
1206-
for (auto &[ProfileCallStack, Idx] : CallSitesIter->second) {
1205+
for (auto CallStackIdx : CallSitesIter->second) {
12071206
// If we found and thus matched all frames on the call, create and
12081207
// attach call stack metadata.
1209-
if (stackFrameIncludesInlinedCallStack(ProfileCallStack.drop_front(Idx),
1208+
if (stackFrameIncludesInlinedCallStack(CallStackIdx,
12101209
InlinedCallStack)) {
12111210
NumOfMemProfMatchedCallSites++;
12121211
addCallsiteMetadata(I, InlinedCallStack, Ctx);
12131212
// Only need to find one with a matching call stack and add a single
12141213
// callsite metadata.
12151214

1216-
// Dump call site matching information upon request.
1215+
// Accumulate call site matching information upon request.
12171216
if (ClPrintMemProfMatchInfo) {
1218-
uint64_t FullStackId = computeFullStackId(ProfileCallStack);
1219-
errs() << "MemProf callsite " << FullStackId << " " << Idx << " "
1220-
<< InlinedCallStack.size() << "\n";
1217+
std::vector<uint64_t> CallStack;
1218+
append_range(CallStack, InlinedCallStack);
1219+
MatchedCallSites.insert(std::move(CallStack));
12211220
}
12221221
break;
12231222
}
@@ -1275,13 +1274,17 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12751274
// it to an allocation in the IR.
12761275
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
12771276

1277+
// Set of the matched call sites, each expressed as a sequence of an inline
1278+
// call stack.
1279+
std::set<std::vector<uint64_t>> MatchedCallSites;
1280+
12781281
for (auto &F : M) {
12791282
if (F.isDeclaration())
12801283
continue;
12811284

12821285
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
12831286
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1284-
UndriftMaps);
1287+
MatchedCallSites, UndriftMaps);
12851288
}
12861289

12871290
if (ClPrintMemProfMatchInfo) {
@@ -1290,6 +1293,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12901293
<< " context with id " << Id << " has total profiled size "
12911294
<< Info.TotalSize << (Info.Matched ? " is" : " not")
12921295
<< " matched\n";
1296+
1297+
for (const auto &CallStack : MatchedCallSites) {
1298+
errs() << "MemProf callsite match for inline call stack";
1299+
for (uint64_t StackId : CallStack)
1300+
errs() << " " << StackId;
1301+
errs() << "\n";
1302+
}
12931303
}
12941304

12951305
return PreservedAnalyses::none();
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
; Tests that the compiler dumps call site matches upon request.
2+
;
3+
; The test case is generated from:
4+
;
5+
; // main
6+
; // |
7+
; // f1 (noinline)
8+
; // |
9+
; // f2
10+
; // |
11+
; // f3 (noinline)
12+
; // |
13+
; // new
14+
;
15+
; __attribute__((noinline)) char *f3() { return ::new char[4]; }
16+
;
17+
; static char *f2() { return f3(); }
18+
;
19+
; __attribute__((noinline)) static char *f1() { return f2(); }
20+
;
21+
; int main() {
22+
; f1();
23+
; return 0;
24+
; }
25+
;
26+
; Here we expect to match two inline call stacks:
27+
;
28+
; - [main]
29+
; - [f1, f2]
30+
;
31+
; Note that f3 is considered to be an allocation site, not a call site, because
32+
; it directly calls new after inlining.
33+
34+
; REQUIRES: x86_64-linux
35+
; RUN: split-file %s %t
36+
; RUN: llvm-profdata merge %t/memprof-dump-matched-call-site.yaml -o %t/memprof-dump-matched-call-site.memprofdata
37+
; RUN: opt < %t/memprof-dump-matched-call-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-call-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
38+
39+
;--- memprof-dump-matched-call-site.yaml
40+
---
41+
HeapProfileRecords:
42+
- GUID: main
43+
AllocSites: []
44+
CallSites:
45+
- - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
46+
- GUID: _ZL2f1v
47+
AllocSites: []
48+
CallSites:
49+
- - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
50+
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
51+
- GUID: _ZL2f2v
52+
AllocSites: []
53+
CallSites:
54+
- - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
55+
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
56+
- GUID: _Z2f3v
57+
AllocSites:
58+
- Callstack:
59+
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
60+
- { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
61+
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
62+
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
63+
MemInfoBlock:
64+
AllocCount: 1
65+
TotalSize: 4
66+
TotalLifetime: 0
67+
TotalLifetimeAccessDensity: 0
68+
CallSites: []
69+
...
70+
;--- memprof-dump-matched-call-site.ll
71+
; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched
72+
; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331
73+
; CHECK: MemProf callsite match for inline call stack 5401059281181789382
74+
75+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
76+
target triple = "x86_64-unknown-linux-gnu"
77+
78+
define ptr @_Z2f3v() {
79+
entry:
80+
%call = call ptr @_Znam(i64 0), !dbg !3
81+
ret ptr null
82+
}
83+
84+
declare ptr @_Znam(i64)
85+
86+
define i32 @main() {
87+
entry:
88+
call void @_ZL2f1v(), !dbg !7
89+
ret i32 0
90+
}
91+
92+
define void @_ZL2f1v() {
93+
entry:
94+
%call.i = call ptr @_Z2f3v(), !dbg !9
95+
ret void
96+
}
97+
98+
!llvm.dbg.cu = !{!0}
99+
!llvm.module.flags = !{!2}
100+
101+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
102+
!1 = !DIFile(filename: "match.cc", directory: "/")
103+
!2 = !{i32 2, !"Debug Info Version", i32 3}
104+
!3 = !DILocation(line: 11, column: 47, scope: !4)
105+
!4 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 11, type: !5, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
106+
!5 = !DISubroutineType(types: !6)
107+
!6 = !{}
108+
!7 = !DILocation(line: 18, column: 3, scope: !8)
109+
!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !5, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
110+
!9 = !DILocation(line: 13, column: 28, scope: !10, inlinedAt: !11)
111+
!10 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 13, type: !5, scopeLine: 13, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
112+
!11 = distinct !DILocation(line: 15, column: 54, scope: !12)
113+
!12 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 15, type: !13, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
114+
!13 = !DISubroutineType(cc: DW_CC_nocall, types: !6)

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,16 @@
101101
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
102102
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
103103
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
104+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
105+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
106+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691
107+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2104812325165620841
108+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 6281715513834610934
109+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 8467819354083268568
110+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 8690657650969109624
111+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 9086428284934609951
112+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 12481870273128938184
113+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 12699492813229484831
104114

105115
; ModuleID = 'memprof.cc'
106116
source_filename = "memprof.cc"

0 commit comments

Comments
 (0)