-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[MemProf] Optionally save context size info on largest cold allocations #142507
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a4d1c12
57f2b5a
326527f
98f4b7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,6 +46,25 @@ cl::opt<unsigned> MinCallsiteColdBytePercent( | |
| cl::desc("Min percent of cold bytes at a callsite to discard non-cold " | ||
| "contexts")); | ||
|
|
||
| // Enable saving context size information for largest cold contexts, which can | ||
| // be used to flag contexts for more aggressive cloning and reporting. | ||
| cl::opt<unsigned> MinPercentMaxColdSize( | ||
| "memprof-min-percent-max-cold-size", cl::init(100), cl::Hidden, | ||
| cl::desc("Min percent of max cold bytes for critical cold context")); | ||
|
|
||
| bool llvm::memprof::metadataIncludesAllContextSizeInfo() { | ||
| return MemProfReportHintedSizes || MinClonedColdBytePercent < 100; | ||
| } | ||
|
|
||
| bool llvm::memprof::metadataMayIncludeContextSizeInfo() { | ||
| return metadataIncludesAllContextSizeInfo() || MinPercentMaxColdSize < 100; | ||
| } | ||
|
|
||
| bool llvm::memprof::recordContextSizeInfoForAnalysis() { | ||
| return metadataMayIncludeContextSizeInfo() || | ||
| MinCallsiteColdBytePercent < 100; | ||
| } | ||
|
|
||
| MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack, | ||
| LLVMContext &Ctx) { | ||
| SmallVector<Metadata *, 8> StackVals; | ||
|
|
@@ -168,7 +187,8 @@ void CallStackTrie::addCallStack(MDNode *MIB) { | |
| static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack, | ||
| AllocationType AllocType, | ||
| ArrayRef<ContextTotalSize> ContextSizeInfo, | ||
| uint64_t &TotalBytes, uint64_t &ColdBytes) { | ||
| uint64_t &TotalBytes, uint64_t &ColdBytes, | ||
|
||
| uint64_t MaxColdSize) { | ||
| SmallVector<Metadata *> MIBPayload( | ||
| {buildCallstackMetadata(MIBCallStack, Ctx)}); | ||
| MIBPayload.push_back( | ||
|
|
@@ -184,12 +204,21 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack, | |
|
|
||
| for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { | ||
| TotalBytes += TotalSize; | ||
| if (AllocType == AllocationType::Cold) | ||
| bool LargeColdContext = false; | ||
| if (AllocType == AllocationType::Cold) { | ||
| ColdBytes += TotalSize; | ||
| // If we have the max cold context size from summary information and have | ||
| // requested identification of contexts above a percentage of the max, see | ||
| // if this context qualifies. | ||
| if (MaxColdSize > 0 && MinPercentMaxColdSize < 100 && | ||
| TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize) | ||
| LargeColdContext = true; | ||
| } | ||
| // Only add the context size info as metadata if we need it in the thin | ||
| // link (currently if reporting of hinted sizes is enabled or we have | ||
| // specified a threshold for marking allocations cold after cloning). | ||
| if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) { | ||
| // link (currently if reporting of hinted sizes is enabled, we have | ||
| // specified a threshold for marking allocations cold after cloning, or we | ||
| // have identified this as a large cold context of interest above). | ||
| if (metadataIncludesAllContextSizeInfo() || LargeColdContext) { | ||
| auto *FullStackIdMD = ValueAsMetadata::get( | ||
| ConstantInt::get(Type::getInt64Ty(Ctx), FullStackId)); | ||
| auto *TotalSizeMD = ValueAsMetadata::get( | ||
|
|
@@ -357,9 +386,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, | |
| if (hasSingleAllocType(Node->AllocTypes)) { | ||
| std::vector<ContextTotalSize> ContextSizeInfo; | ||
| collectContextSizeInfo(Node, ContextSizeInfo); | ||
| MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, | ||
| (AllocationType)Node->AllocTypes, | ||
| ContextSizeInfo, TotalBytes, ColdBytes)); | ||
| MIBNodes.push_back( | ||
| createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, | ||
| ContextSizeInfo, TotalBytes, ColdBytes, MaxColdSize)); | ||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -413,7 +442,8 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, | |
| std::vector<ContextTotalSize> ContextSizeInfo; | ||
| collectContextSizeInfo(Node, ContextSizeInfo); | ||
| MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold, | ||
| ContextSizeInfo, TotalBytes, ColdBytes)); | ||
| ContextSizeInfo, TotalBytes, ColdBytes, | ||
| MaxColdSize)); | ||
| return true; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -544,7 +544,8 @@ static void computeFunctionSummary( | |
| } | ||
| // If we have context size information, collect it for inclusion in | ||
| // the summary. | ||
| assert(MIBMD->getNumOperands() > 2 || !MemProfReportHintedSizes); | ||
| assert(MIBMD->getNumOperands() > 2 || | ||
| !metadataIncludesAllContextSizeInfo()); | ||
| if (MIBMD->getNumOperands() > 2) { | ||
| std::vector<ContextTotalSize> ContextSizes; | ||
| for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) { | ||
|
|
@@ -558,7 +559,21 @@ static void computeFunctionSummary( | |
| ->getZExtValue(); | ||
| ContextSizes.push_back({FullStackId, TS}); | ||
| } | ||
| // The ContextSizeInfos must be in the same relative position as the | ||
| // associated MIB. In some cases we only include a ContextSizeInfo | ||
| // for a subset of MIBs in an allocation. In those cases we insert | ||
| // 0s for the other MIBs. Handle the case where the first | ||
| // ContextSizeInfo being inserted is not for the first MIB, insert | ||
| // a pair of 0s for each of the prior MIBs. | ||
| if (ContextSizeInfos.empty() && !MIBs.empty()) | ||
|
||
| ContextSizeInfos.insert(ContextSizeInfos.begin(), MIBs.size(), | ||
| {{0, 0}}); | ||
| ContextSizeInfos.push_back(std::move(ContextSizes)); | ||
| } else if (!ContextSizeInfos.empty()) { | ||
| // See earlier comment about handling case of ContextSizeInfos only | ||
| // for a subset of MIBs. Insert a pair of 0s for this MIB as it does | ||
| // not have a ContextSizeInfo but other MIBs did. | ||
| ContextSizeInfos.push_back({{0, 0}}); | ||
| } | ||
| MIBs.push_back( | ||
| MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices))); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| ;; Test that we get hinted size reporting for just the subset of MIBs that | ||
| ;; contain context size info in the metadata. | ||
|
|
||
| ;; Generate the bitcode including ThinLTO summary. Specify | ||
| ;; -memprof-min-percent-max-cold-size (value doesn't matter) to indicate to | ||
| ;; the bitcode writer that it should expect and optimize for partial context | ||
| ;; size info. | ||
| ; RUN: opt -thinlto-bc -memprof-min-percent-max-cold-size=50 %s >%t.o | ||
|
|
||
| ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ | ||
| ; RUN: -supports-hot-cold-new \ | ||
| ; RUN: -r=%t.o,main,plx \ | ||
| ; RUN: -r=%t.o,_Znam, \ | ||
| ; RUN: -memprof-report-hinted-sizes \ | ||
| ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=SIZES | ||
|
|
||
| ;; We should only get these two messages from -memprof-report-hinted-sizes | ||
| ;; as they are the only MIBs with recorded context size info. | ||
| ; SIZES-NOT: full allocation context | ||
| ; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2) | ||
| ; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2) | ||
| ; SIZES-NOT: full allocation context | ||
|
|
||
| source_filename = "memprof-report-hinted-partial.ll" | ||
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" | ||
| target triple = "x86_64-unknown-linux-gnu" | ||
|
|
||
| define i32 @main() #0 { | ||
| entry: | ||
| %call = call ptr @_Z3foov(), !callsite !0 | ||
| %call1 = call ptr @_Z3foov(), !callsite !1 | ||
| ret i32 0 | ||
| } | ||
|
|
||
| define internal ptr @_Z3barv() #0 { | ||
| entry: | ||
| %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 | ||
| ret ptr null | ||
| } | ||
|
|
||
| declare ptr @_Znam(i64) | ||
|
|
||
| define internal ptr @_Z3bazv() #0 { | ||
| entry: | ||
| %call = call ptr @_Z3barv(), !callsite !8 | ||
| ret ptr null | ||
| } | ||
|
|
||
| define internal ptr @_Z3foov() #0 { | ||
| entry: | ||
| %call = call ptr @_Z3bazv(), !callsite !9 | ||
| ret ptr null | ||
| } | ||
|
|
||
| ; uselistorder directives | ||
| uselistorder ptr @_Z3foov, { 1, 0 } | ||
|
|
||
| attributes #0 = { noinline optnone } | ||
|
|
||
| !0 = !{i64 8632435727821051414} | ||
| !1 = !{i64 -3421689549917153178} | ||
| !2 = !{!3, !5, !13} | ||
| !3 = !{!4, !"notcold"} | ||
| !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} | ||
| !5 = !{!6, !"cold", !11, !12} | ||
| !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} | ||
| !7 = !{i64 9086428284934609951} | ||
| !8 = !{i64 -5964873800580613432} | ||
| !9 = !{i64 2732490490862098848} | ||
| !11 = !{i64 456, i64 200} | ||
| !12 = !{i64 789, i64 300} | ||
| !13 = !{!14, !"cold"} | ||
| !14 = !{i64 9086428284934609951, i64 12345} |
Uh oh!
There was an error while loading. Please reload this page.