diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 46b524d054493..67eba057c1791 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -818,6 +818,7 @@ static bool isAllocationWithHotColdVariant(const Function *Callee, struct AllocMatchInfo { uint64_t TotalSize = 0; + size_t NumFramesMatched = 0; AllocationType AllocType = AllocationType::None; bool Matched = false; }; @@ -1152,7 +1153,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, if (ClPrintMemProfMatchInfo) { assert(FullStackId != 0); FullStackIdToAllocMatchInfo[FullStackId] = { - AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true}; + AllocInfo->Info.getTotalSize(), InlinedCallStack.size(), + AllocType, /*Matched=*/true}; } } } @@ -1285,7 +1287,7 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType) << " context with id " << Id << " has total profiled size " << Info.TotalSize << (Info.Matched ? " is" : " not") - << " matched\n"; + << " matched with " << Info.NumFramesMatched << " frames\n"; for (const auto &CallStack : MatchedCallSites) { errs() << "MemProf callsite match for inline call stack"; diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll new file mode 100644 index 0000000000000..b9126ac9a457f --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll @@ -0,0 +1,78 @@ +; Tests that the compiler dumps an allocation site with multiple inlined frames. +; +; The test case is generated from: +; +; // main +; // | +; // f1 (noinline) +; // | +; // f2 +; // | +; // f3 +; // | +; // new +; +; char *f1() { return new char[3]; } +; char *f2() { return f1(); } +; __attribute__((noinline)) char *f3() { return f2(); } +; +; int main() { +; f3(); +; return 0; +; } +; +; Here we expect to match the allocation site to encompass 3 frames. + +; REQUIRES: x86_64-linux +; RUN: split-file %s %t +; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata +; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use' -memprof-print-match-info -S 2>&1 | FileCheck %s + +;--- memprof-dump-matched-alloc-site.yaml +--- +HeapProfileRecords: + - GUID: _Z2f3v + AllocSites: + - Callstack: + - { Function: _ZL2f1v, LineOffset: 0, Column: 35, IsInlineFrame: true } + - { Function: _ZL2f2v, LineOffset: 0, Column: 35, IsInlineFrame: true } + - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false } + - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 3 + TotalLifetime: 0 + TotalLifetimeAccessDensity: 0 + CallSites: + # Kept empty here because this section is irrelevant for this test. +... +;--- memprof-dump-matched-alloc-site.ll +; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z2f3v() { +entry: + %call.i.i = call ptr @_Znam(i64 0), !dbg !3 + ret ptr null +} + +declare ptr @_Znam(i64) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1) +!1 = !DIFile(filename: "memprof-dump-matched-alloc-site.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !DILocation(line: 1, column: 35, scope: !4, inlinedAt: !7) +!4 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!5 = !DISubroutineType(types: !6) +!6 = !{} +!7 = distinct !DILocation(line: 2, column: 35, scope: !8, inlinedAt: !9) +!8 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!9 = distinct !DILocation(line: 3, column: 47, scope: !10) +!10 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!11 = !DILocation(line: 6, column: 3, scope: !12) +!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !5, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll index 6fe0e5cd497ec..fa99116b820f9 100644 --- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll +++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll @@ -71,7 +71,7 @@ HeapProfileRecords: CallSites: [] ... ;--- memprof-dump-matched-call-site.ll -; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched +; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched with 1 frames ; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331 ; CHECK: MemProf callsite match for inline call stack 5401059281181789382 diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index 5a958de5f7f8d..73226df861ea5 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -93,14 +93,14 @@ ;; notcold again. ; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL -; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched -; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched -; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched -; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched -; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched -; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched -; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched -; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched +; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691