-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[memprof] Dump the number of matched frames #137082
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[memprof] Dump the number of matched frames #137082
Conversation
This patch teaches readMemprof to dump the number of frames for each allocation site match. This information helps us analyze what part of the call stack in the MemProf profile has matched the IR. Aside from updating existing test cases, this patch adds one more test case, memprof-dump-matched-alloc-site.ll, because none of the existing test cases has the number of frames greater than one.
|
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-llvm-transforms Author: Kazu Hirata (kazutakahirata) ChangesThis patch teaches readMemprof to dump the number of frames for each Aside from updating existing test cases, this patch adds one more test Full diff: https://github.com/llvm/llvm-project/pull/137082.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 46b524d054493..afcce5e82ba8b 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -820,6 +820,7 @@ struct AllocMatchInfo {
uint64_t TotalSize = 0;
AllocationType AllocType = AllocationType::None;
bool Matched = false;
+ size_t NumFramesMatched = 0;
};
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
@@ -1152,7 +1153,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
if (ClPrintMemProfMatchInfo) {
assert(FullStackId != 0);
FullStackIdToAllocMatchInfo[FullStackId] = {
- AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
+ AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true,
+ InlinedCallStack.size()};
}
}
}
@@ -1285,7 +1287,7 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
<< " context with id " << Id << " has total profiled size "
<< Info.TotalSize << (Info.Matched ? " is" : " not")
- << " matched\n";
+ << " matched with " << Info.NumFramesMatched << " frames\n";
for (const auto &CallStack : MatchedCallSites) {
errs() << "MemProf callsite match for inline call stack";
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
new file mode 100644
index 0000000000000..7840f68e18d8c
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -0,0 +1,78 @@
+; Tests that the compiler dumps an allocation site with multiple inlined frames.
+;
+; The test case is generated from:
+;
+; // main
+; // |
+; // f1 (noinline)
+; // |
+; // f2
+; // |
+; // f3
+; // |
+; // new
+;
+; char *f1() { return new char[3]; }
+; char *f2() { return f1(); }
+; __attribute__((noinline)) char *f3() { return f2(); }
+;
+; int main() {
+; f3();
+; return 0;
+; }
+;
+; Here we expect to match the allocation site to encompass 3 frames.
+
+; REQUIRES: x86_64-linux
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
+
+;--- memprof-dump-matched-alloc-site.yaml
+---
+---
+HeapProfileRecords:
+ - GUID: _Z2f3v
+ AllocSites:
+ - Callstack:
+ - { Function: _ZL2f1v, LineOffset: 0, Column: 35, IsInlineFrame: true }
+ - { Function: _ZL2f2v, LineOffset: 0, Column: 35, IsInlineFrame: true }
+ - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
+ - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 3
+ TotalLifetime: 0
+ TotalLifetimeAccessDensity: 0
+ CallSites:
+...
+;--- memprof-dump-matched-alloc-site.ll
+; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define ptr @_Z2f3v() {
+entry:
+ %call.i.i = call ptr @_Znam(i64 0), !dbg !3
+ ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "memprof-dump-matched-alloc-site.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DILocation(line: 1, column: 35, scope: !4, inlinedAt: !7)
+!4 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!5 = !DISubroutineType(types: !6)
+!6 = !{}
+!7 = distinct !DILocation(line: 2, column: 35, scope: !8, inlinedAt: !9)
+!8 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = distinct !DILocation(line: 3, column: 47, scope: !10)
+!10 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!11 = !DILocation(line: 6, column: 3, scope: !12)
+!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !5, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
index 6fe0e5cd497ec..fa99116b820f9 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
@@ -71,7 +71,7 @@ HeapProfileRecords:
CallSites: []
...
;--- memprof-dump-matched-call-site.ll
-; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched
+; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched with 1 frames
; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331
; CHECK: MemProf callsite match for inline call stack 5401059281181789382
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index 5a958de5f7f8d..73226df861ea5 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -93,14 +93,14 @@
;; notcold again.
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
-; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691
|
llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
Outdated
Show resolved
Hide resolved
This patch teaches readMemprof to dump the number of frames for each allocation site match. This information helps us analyze what part of the call stack in the MemProf profile has matched the IR. Aside from updating existing test cases, this patch adds one more test case, memprof-dump-matched-alloc-site.ll, because none of the existing test cases has the number of frames greater than one.
This patch teaches readMemprof to dump the number of frames for each
allocation site match. This information helps us analyze what part of
the call stack in the MemProf profile has matched the IR.
Aside from updating existing test cases, this patch adds one more test
case, memprof-dump-matched-alloc-site.ll, because none of the existing
test cases has the number of frames greater than one.