-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[MemProf] Optionally discard small non-cold contexts #139113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
teresajohnson
merged 2 commits into
llvm:main
from
teresajohnson:memprof_callsite_cold_thresh
May 9, 2025
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ | |
| #include "llvm/Analysis/MemoryProfileInfo.h" | ||
| #include "llvm/IR/Constants.h" | ||
| #include "llvm/Support/CommandLine.h" | ||
| #include "llvm/Support/Format.h" | ||
|
|
||
| using namespace llvm; | ||
| using namespace llvm::memprof; | ||
|
|
@@ -58,6 +59,19 @@ cl::opt<bool> MemProfKeepAllNotColdContexts( | |
| "memprof-keep-all-not-cold-contexts", cl::init(false), cl::Hidden, | ||
| cl::desc("Keep all non-cold contexts (increases cloning overheads)")); | ||
|
|
||
| cl::opt<unsigned> MinClonedColdBytePercent( | ||
| "memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, | ||
| cl::desc("Min percent of cold bytes to hint alloc cold during cloning")); | ||
|
|
||
| // Discard non-cold contexts if they overlap with much larger cold contexts, | ||
| // specifically, if all contexts reaching a given callsite are at least this | ||
| // percent cold byte allocations. This reduces the amount of cloning required | ||
| // to expose the cold contexts when they greatly dominate non-cold contexts. | ||
| cl::opt<unsigned> MinCallsiteColdBytePercent( | ||
| "memprof-callsite-cold-threshold", cl::init(100), cl::Hidden, | ||
| cl::desc("Min percent of cold bytes at a callsite to discard non-cold " | ||
| "contexts")); | ||
|
|
||
| AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity, | ||
| uint64_t AllocCount, | ||
| uint64_t TotalLifetime) { | ||
|
|
@@ -208,13 +222,29 @@ void CallStackTrie::addCallStack(MDNode *MIB) { | |
|
|
||
| static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack, | ||
| AllocationType AllocType, | ||
| ArrayRef<ContextTotalSize> ContextSizeInfo) { | ||
| ArrayRef<ContextTotalSize> ContextSizeInfo, | ||
| uint64_t &TotalBytes, uint64_t &ColdBytes) { | ||
| SmallVector<Metadata *> MIBPayload( | ||
| {buildCallstackMetadata(MIBCallStack, Ctx)}); | ||
| MIBPayload.push_back( | ||
| MDString::get(Ctx, getAllocTypeAttributeString(AllocType))); | ||
| if (!ContextSizeInfo.empty()) { | ||
| for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { | ||
|
|
||
| if (ContextSizeInfo.empty()) { | ||
| // The profile matcher should have provided context size info if there was a | ||
| // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully | ||
| // handle a user-provided percent larger than 100. | ||
| assert(MinCallsiteColdBytePercent >= 100); | ||
| return MDNode::get(Ctx, MIBPayload); | ||
| } | ||
|
|
||
| for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { | ||
| TotalBytes += TotalSize; | ||
| if (AllocType == AllocationType::Cold) | ||
| ColdBytes += TotalSize; | ||
| // Only add the context size info as metadata if we need it in the thin | ||
| // link (currently if reporting of hinted sizes is enabled or we have | ||
| // specified a threshold for marking allocations cold after cloning). | ||
| if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) { | ||
| auto *FullStackIdMD = ValueAsMetadata::get( | ||
| ConstantInt::get(Type::getInt64Ty(Ctx), FullStackId)); | ||
| auto *TotalSizeMD = ValueAsMetadata::get( | ||
|
|
@@ -223,6 +253,7 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack, | |
| MIBPayload.push_back(ContextSizeMD); | ||
| } | ||
| } | ||
| assert(TotalBytes > 0); | ||
| return MDNode::get(Ctx, MIBPayload); | ||
| } | ||
|
|
||
|
|
@@ -246,9 +277,14 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) { | |
| // on options that enable filtering out some NotCold contexts. | ||
| static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes, | ||
| std::vector<Metadata *> &SavedMIBNodes, | ||
| unsigned CallerContextLength) { | ||
| unsigned CallerContextLength, | ||
| uint64_t TotalBytes, uint64_t ColdBytes) { | ||
| const bool MostlyCold = | ||
| MinCallsiteColdBytePercent < 100 && | ||
| ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes; | ||
|
|
||
| // In the simplest case, with pruning disabled, keep all the new MIB nodes. | ||
| if (MemProfKeepAllNotColdContexts) { | ||
| if (MemProfKeepAllNotColdContexts && !MostlyCold) { | ||
| append_range(SavedMIBNodes, NewMIBNodes); | ||
| return; | ||
| } | ||
|
|
@@ -271,6 +307,30 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes, | |
| } | ||
| }; | ||
|
|
||
| // If the cold bytes at the current callsite exceed the given threshold, we | ||
| // discard all non-cold contexts so do not need any of the later pruning | ||
| // handling. We can simply copy over all the cold contexts and return early. | ||
| if (MostlyCold) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a short comment describing why we can return early if MostlyCold is true?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| auto NewColdMIBNodes = | ||
| make_filter_range(NewMIBNodes, [&](const Metadata *M) { | ||
| auto MIBMD = cast<MDNode>(M); | ||
| // Only append cold contexts. | ||
| if (getMIBAllocType(MIBMD) == AllocationType::Cold) | ||
| return true; | ||
| if (MemProfReportHintedSizes) { | ||
| const float PercentCold = ColdBytes * 100.0 / TotalBytes; | ||
| std::string PercentStr; | ||
| llvm::raw_string_ostream OS(PercentStr); | ||
| OS << format(" for %5.2f%% cold bytes", PercentCold); | ||
| EmitMessageForRemovedContexts(MIBMD, "discarded", OS.str()); | ||
| } | ||
| return false; | ||
| }); | ||
| for (auto *M : NewColdMIBNodes) | ||
| SavedMIBNodes.push_back(M); | ||
| return; | ||
| } | ||
|
|
||
| // Prune unneeded NotCold contexts, taking advantage of the fact | ||
| // that we later will only clone Cold contexts, as NotCold is the allocation | ||
| // default. We only need to keep as metadata the NotCold contexts that | ||
|
|
@@ -341,17 +401,20 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes, | |
| // Recursive helper to trim contexts and create metadata nodes. | ||
| // Caller should have pushed Node's loc to MIBCallStack. Doing this in the | ||
| // caller makes it simpler to handle the many early returns in this method. | ||
| // Updates the total and cold profiled bytes in the subtrie rooted at this node. | ||
| bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, | ||
| std::vector<uint64_t> &MIBCallStack, | ||
| std::vector<Metadata *> &MIBNodes, | ||
| bool CalleeHasAmbiguousCallerContext) { | ||
| bool CalleeHasAmbiguousCallerContext, | ||
| uint64_t &TotalBytes, uint64_t &ColdBytes) { | ||
| // Trim context below the first node in a prefix with a single alloc type. | ||
| // Add an MIB record for the current call stack prefix. | ||
| if (hasSingleAllocType(Node->AllocTypes)) { | ||
| std::vector<ContextTotalSize> ContextSizeInfo; | ||
| collectContextSizeInfo(Node, ContextSizeInfo); | ||
| MIBNodes.push_back(createMIBNode( | ||
| Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo)); | ||
| MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, | ||
| (AllocationType)Node->AllocTypes, | ||
| ContextSizeInfo, TotalBytes, ColdBytes)); | ||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -364,17 +427,25 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, | |
| // that will later be filtered before adding to the caller's MIBNodes | ||
| // vector. | ||
| std::vector<Metadata *> NewMIBNodes; | ||
| // Determine the total and cold byte counts for all callers, then add to the | ||
| // caller's counts further below. | ||
| uint64_t CallerTotalBytes = 0; | ||
| uint64_t CallerColdBytes = 0; | ||
| for (auto &Caller : Node->Callers) { | ||
| MIBCallStack.push_back(Caller.first); | ||
| AddedMIBNodesForAllCallerContexts &= | ||
| buildMIBNodes(Caller.second, Ctx, MIBCallStack, NewMIBNodes, | ||
| NodeHasAmbiguousCallerContext); | ||
| AddedMIBNodesForAllCallerContexts &= buildMIBNodes( | ||
| Caller.second, Ctx, MIBCallStack, NewMIBNodes, | ||
| NodeHasAmbiguousCallerContext, CallerTotalBytes, CallerColdBytes); | ||
| // Remove Caller. | ||
| MIBCallStack.pop_back(); | ||
| } | ||
| // Pass in the stack length of the MIB nodes added for the immediate caller, | ||
| // which is the current stack length plus 1. | ||
| saveFilteredNewMIBNodes(NewMIBNodes, MIBNodes, MIBCallStack.size() + 1); | ||
| saveFilteredNewMIBNodes(NewMIBNodes, MIBNodes, MIBCallStack.size() + 1, | ||
| CallerTotalBytes, CallerColdBytes); | ||
| TotalBytes += CallerTotalBytes; | ||
| ColdBytes += CallerColdBytes; | ||
|
|
||
| if (AddedMIBNodesForAllCallerContexts) | ||
| return true; | ||
| // We expect that the callers should be forced to add MIBs to disambiguate | ||
|
|
@@ -397,7 +468,7 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, | |
| std::vector<ContextTotalSize> ContextSizeInfo; | ||
| collectContextSizeInfo(Node, ContextSizeInfo); | ||
| MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold, | ||
| ContextSizeInfo)); | ||
| ContextSizeInfo, TotalBytes, ColdBytes)); | ||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -444,12 +515,15 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { | |
| std::vector<uint64_t> MIBCallStack; | ||
| MIBCallStack.push_back(AllocStackId); | ||
| std::vector<Metadata *> MIBNodes; | ||
| uint64_t TotalBytes = 0; | ||
| uint64_t ColdBytes = 0; | ||
| assert(!Alloc->Callers.empty() && "addCallStack has not been called yet"); | ||
| // The CalleeHasAmbiguousCallerContext flag is meant to say whether the | ||
| // callee of the given node has more than one caller. Here the node being | ||
| // passed in is the alloc and it has no callees. So it's false. | ||
| if (buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes, | ||
| /*CalleeHasAmbiguousCallerContext=*/false)) { | ||
| /*CalleeHasAmbiguousCallerContext=*/false, TotalBytes, | ||
| ColdBytes)) { | ||
| assert(MIBCallStack.size() == 1 && | ||
| "Should only be left with Alloc's location in stack"); | ||
| CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes)); | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.