-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[MemProf] Prune unneeded non-cold contexts #124823
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
7e56612
8ea9e2e
ed2c505
f7aba99
82c9849
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,13 @@ cl::opt<bool> MemProfReportHintedSizes( | |
| "memprof-report-hinted-sizes", cl::init(false), cl::Hidden, | ||
| cl::desc("Report total allocation sizes of hinted allocations")); | ||
|
|
||
| // This is useful if we have enabled reporting of hinted sizes, and want to get | ||
| // information from the indexing step for all contexts (especially for testing), | ||
| // or have specified a value less than 100% for -memprof-cloning-cold-threshold. | ||
| cl::opt<bool> MemProfKeepAllNotColdContexts( | ||
| "memprof-keep-all-not-cold-contexts", cl::init(false), cl::Hidden, | ||
| cl::desc("Disable pruning of non-cold contexts unneeded for cold cloning")); | ||
|
|
||
| AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity, | ||
| uint64_t AllocCount, | ||
| uint64_t TotalLifetime) { | ||
|
|
@@ -156,10 +163,16 @@ void CallStackTrie::addCallStack( | |
| continue; | ||
| } | ||
| // Update existing caller node if it exists. | ||
| CallStackTrieNode *Prev = nullptr; | ||
| auto Next = Curr->Callers.find(StackId); | ||
| if (Next != Curr->Callers.end()) { | ||
| Prev = Curr; | ||
| Curr = Next->second; | ||
| Curr->addAllocType(AllocType); | ||
| // If this node has an ambiguous alloc type, its callee is not the deepest | ||
| // point where we have an ambigous allocation type. | ||
| if (!hasSingleAllocType(Curr->AllocTypes)) | ||
| Prev->DeepestAmbiguousAllocType = false; | ||
| continue; | ||
| } | ||
| // Otherwise add a new caller node. | ||
|
|
@@ -243,14 +256,35 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) { | |
| bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, | ||
| std::vector<uint64_t> &MIBCallStack, | ||
| std::vector<Metadata *> &MIBNodes, | ||
| bool CalleeHasAmbiguousCallerContext) { | ||
| bool CalleeHasAmbiguousCallerContext, | ||
| bool &CalleeDeepestAmbiguousAllocType) { | ||
| // Trim context below the first node in a prefix with a single alloc type. | ||
| // Add an MIB record for the current call stack prefix. | ||
| if (hasSingleAllocType(Node->AllocTypes)) { | ||
| std::vector<ContextTotalSize> ContextSizeInfo; | ||
| collectContextSizeInfo(Node, ContextSizeInfo); | ||
| MIBNodes.push_back(createMIBNode( | ||
| Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo)); | ||
| // Because we only clone cold contexts (we don't clone for exposing NotCold | ||
| // contexts as that is the default allocation behavior), we create MIB | ||
| // metadata for this context if any of the following are true: | ||
| // 1) It is cold. | ||
| // 2) The immediate callee is the deepest point where we have an ambiguous | ||
| // allocation type (i.e. the other callers that are cold need to know | ||
| // that we have a not cold context overlapping to this point so that we | ||
| // know how deep to clone). | ||
| // 3) MemProfKeepAllNotColdContexts is enabled, which is useful if we are | ||
| // reporting hinted sizes, and want to get information from the indexing | ||
| // step for all contexts, or have specified a value less than 100% for | ||
| // -memprof-cloning-cold-threshold. | ||
| if ((AllocationType)Node->AllocTypes == AllocationType::Cold || | ||
|
||
| CalleeDeepestAmbiguousAllocType || MemProfKeepAllNotColdContexts) { | ||
| std::vector<ContextTotalSize> ContextSizeInfo; | ||
| collectContextSizeInfo(Node, ContextSizeInfo); | ||
| MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, | ||
| (AllocationType)Node->AllocTypes, | ||
| ContextSizeInfo)); | ||
| // If we just emitted an MIB for a not cold caller, don't need to emit | ||
| // another one for the callee to correctly disambiguate its cold callers. | ||
| if ((AllocationType)Node->AllocTypes != AllocationType::Cold) | ||
| CalleeDeepestAmbiguousAllocType = false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -261,9 +295,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, | |
| bool AddedMIBNodesForAllCallerContexts = true; | ||
| for (auto &Caller : Node->Callers) { | ||
| MIBCallStack.push_back(Caller.first); | ||
| AddedMIBNodesForAllCallerContexts &= | ||
| buildMIBNodes(Caller.second, Ctx, MIBCallStack, MIBNodes, | ||
| NodeHasAmbiguousCallerContext); | ||
| AddedMIBNodesForAllCallerContexts &= buildMIBNodes( | ||
| Caller.second, Ctx, MIBCallStack, MIBNodes, | ||
| NodeHasAmbiguousCallerContext, Node->DeepestAmbiguousAllocType); | ||
| // Remove Caller. | ||
| MIBCallStack.pop_back(); | ||
| } | ||
|
|
@@ -337,10 +371,16 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { | |
| MIBCallStack.push_back(AllocStackId); | ||
| std::vector<Metadata *> MIBNodes; | ||
| assert(!Alloc->Callers.empty() && "addCallStack has not been called yet"); | ||
| // The last parameter is meant to say whether the callee of the given node | ||
| // has more than one caller. Here the node being passed in is the alloc | ||
| // and it has no callees. So it's false. | ||
| if (buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes, false)) { | ||
| // The CalleeHasAmbiguousCallerContext flag is meant to say whether the | ||
| // callee of the given node has more than one caller. Here the node being | ||
| // passed in is the alloc and it has no callees. So it's false. | ||
| // Similarly, the last parameter is meant to say whether the callee of the | ||
| // given node is the deepest point where we have ambiguous alloc types, which | ||
| // is also false as the alloc has no callees. | ||
| bool DeepestAmbiguousAllocType = true; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we change buildMIBNodes to take a pointer to a bool instead of a reference (defaulting to nullptr) then we can avoid the change (and redundant var) here. Inside buildMIBNodes we'd check and fill in the bool only if the pointer was not null. Wdyt?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I started to do this but it made the uses more complicated inside buildMIBNodes and I think it ends up more complicated and hard to reason about overall. So I'd prefer to leave as-is. |
||
| if (buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes, | ||
| /*CalleeHasAmbiguousCallerContext=*/false, | ||
| DeepestAmbiguousAllocType)) { | ||
| assert(MIBCallStack.size() == 1 && | ||
| "Should only be left with Alloc's location in stack"); | ||
| CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes)); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: The current description is a bit hard to parse how about "Keep all non-cold contexts (increases cloning overheads)" instead of the current description.
Also should the default be
true(existing behaviour) since internal users set-memprof-cloning-cold-thresholdto less than 100?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will change the description. But I wanted to point out that we are not currently using -memprof-cloning-cold-threshold, only -memprof-matching-cold-threshold which doesn't rely on the metadata. I would like to keep the default here true to reduce overhead by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for clarifying we only use the matching option for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the flag description per suggestion