diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index b72d41a748857..bc4d331862dc5 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -454,6 +454,15 @@ handleAllocSite(Instruction &I, CallBase *CI, InlinedCallStack.size())] = { AllocInfo->Info.getTotalSize(), AllocType}; } + ORE.emit( + OptimizationRemark(DEBUG_TYPE, "MemprofContext", CI) + << ore::NV("AllocationCall", CI) << " in function " + << ore::NV("Caller", CI->getFunction()) + << " matched alloc context with alloc type " + << ore::NV("Attribute", getAllocTypeAttributeString(AllocType)) + << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize()) + << " full context id " << ore::NV("Context", FullStackId) + << " frame count " << ore::NV("Frames", InlinedCallStack.size())); } } // If the threshold for the percent of cold bytes is less than 100%, @@ -516,7 +525,8 @@ static void handleCallSite( Instruction &I, const Function *CalledFunction, ArrayRef InlinedCallStack, const std::unordered_set &CallSiteEntries, - Module &M, std::set> &MatchedCallSites) { + Module &M, std::set> &MatchedCallSites, + OptimizationRemarkEmitter &ORE) { auto &Ctx = M.getContext(); for (const auto &CallSiteEntry : CallSiteEntries) { // If we found and thus matched all frames on the call, create and @@ -539,6 +549,11 @@ static void handleCallSite( append_range(CallStack, InlinedCallStack); MatchedCallSites.insert(std::move(CallStack)); } + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofContext", &I) + << ore::NV("CallSite", &I) << " in function " + << ore::NV("Caller", I.getFunction()) + << " matched callsite with frame count " + << ore::NV("Frames", InlinedCallStack.size())); break; } } @@ -719,7 +734,7 @@ static void readMemprof(Module &M, Function &F, // instruction's leaf location in the callsites map and not the // allocation map. handleCallSite(I, CalledFunction, InlinedCallStack, - CallSitesIter->second, M, MatchedCallSites); + CallSitesIter->second, M, MatchedCallSites, ORE); } } } diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll index 2dcaa9d492869..10cafff3490b3 100644 --- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll +++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll @@ -26,7 +26,7 @@ ; REQUIRES: x86_64-linux ; RUN: split-file %s %t ; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata -; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use' -memprof-print-match-info -S 2>&1 | FileCheck %s +; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use' -memprof-print-match-info -S -pass-remarks=memprof 2>&1 | FileCheck %s ;--- memprof-dump-matched-alloc-site.yaml --- @@ -77,6 +77,13 @@ HeapProfileRecords: # Kept empty here because this section is irrelevant for this test. ... ;--- memprof-dump-matched-alloc-site.ll + +;; From -pass-remarks=memprof +; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f1v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 1 +; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f2v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 2 +; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f3v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 3 + +;; From -memprof-print-match-info ; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames ; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames ; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll index fa99116b820f9..541ac7b561e8b 100644 --- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll +++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll @@ -34,7 +34,7 @@ ; REQUIRES: x86_64-linux ; RUN: split-file %s %t ; RUN: llvm-profdata merge %t/memprof-dump-matched-call-site.yaml -o %t/memprof-dump-matched-call-site.memprofdata -; RUN: opt < %t/memprof-dump-matched-call-site.ll -passes='memprof-use' -memprof-print-match-info -S 2>&1 | FileCheck %s +; RUN: opt < %t/memprof-dump-matched-call-site.ll -passes='memprof-use' -memprof-print-match-info -S -pass-remarks=memprof 2>&1 | FileCheck %s ;--- memprof-dump-matched-call-site.yaml --- @@ -71,6 +71,12 @@ HeapProfileRecords: CallSites: [] ... ;--- memprof-dump-matched-call-site.ll + +;; From -pass-remarks=memprof +; CHECK: remark: match.cc:18:3: call in function main matched callsite with frame count 1 +; CHECK: remark: match.cc:13:28: call in function _ZL2f1v matched callsite with frame count 2 + +;; From -memprof-print-match-info ; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched with 1 frames ; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331 ; CHECK: MemProf callsite match for inline call stack 5401059281181789382