@@ -163,16 +163,10 @@ void CallStackTrie::addCallStack(
163163 continue ;
164164 }
165165 // Update existing caller node if it exists.
166- CallStackTrieNode *Prev = nullptr ;
167166 auto [Next, Inserted] = Curr->Callers .try_emplace (StackId);
168167 if (!Inserted) {
169- Prev = Curr;
170168 Curr = Next->second ;
171169 Curr->addAllocType (AllocType);
172- // If this node has an ambiguous alloc type, its callee is not the deepest
173- // point where we have an ambigous allocation type.
174- if (!hasSingleAllocType (Curr->AllocTypes ))
175- Prev->DeepestAmbiguousAllocType = false ;
176170 continue ;
177171 }
178172 // Otherwise add a new caller node.
@@ -248,41 +242,114 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
248242 convertHotToNotCold (Caller.second );
249243}
250244
245+ // Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending
246+ // on options that enable filtering out some NotCold contexts.
247+ static void SaveFilteredNewMIBNodes (std::vector<Metadata *> &NewMIBNodes,
248+ std::vector<Metadata *> &SavedMIBNodes,
249+ unsigned CallerContextLength) {
250+ // In the simplest case, with pruning disabled, keep all the new MIB nodes.
251+ if (MemProfKeepAllNotColdContexts)
252+ append_range (SavedMIBNodes, NewMIBNodes);
253+
254+ auto EmitMessageForRemovedContexts = [](const MDNode *MIBMD, StringRef Tag,
255+ StringRef Extra) {
256+ assert (MIBMD->getNumOperands () > 2 );
257+ for (unsigned I = 2 ; I < MIBMD->getNumOperands (); I++) {
258+ MDNode *ContextSizePair = dyn_cast<MDNode>(MIBMD->getOperand (I));
259+ assert (ContextSizePair->getNumOperands () == 2 );
260+ uint64_t FullStackId =
261+ mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand (0 ))
262+ ->getZExtValue ();
263+ uint64_t TS =
264+ mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand (1 ))
265+ ->getZExtValue ();
266+ errs () << " MemProf hinting: Total size for " << Tag
267+ << " non-cold full allocation context hash " << FullStackId
268+ << Extra << " : " << TS << " \n " ;
269+ }
270+ };
271+
272+ // Prune unneeded NotCold contexts, taking advantage of the fact
273+ // that we later will only clone Cold contexts, as NotCold is the allocation
274+ // default. We only need to keep as metadata the NotCold contexts that
275+ // overlap the longest with Cold allocations, so that we know how deeply we
276+ // need to clone. For example, assume we add the following contexts to the
277+ // trie:
278+ // 1 3 (notcold)
279+ // 1 2 4 (cold)
280+ // 1 2 5 (notcold)
281+ // 1 2 6 (notcold)
282+ // the trie looks like:
283+ // 1
284+ // / \
285+ // 2 3
286+ // /|\
287+ // 4 5 6
288+ //
289+ // It is sufficient to prune all but one not-cold contexts (either 1,2,5 or
290+ // 1,2,6, we arbitrarily keep the first one we encounter which will be
291+ // 1,2,5).
292+ //
293+ // To do this pruning, we first check if there were any not-cold
294+ // contexts kept for a deeper caller, which will have a context length larger
295+ // than the CallerContextLength being handled here (i.e. kept by a deeper
296+ // recursion step). If so, none of the not-cold MIB nodes added for the
297+ // immediate callers need to be kept. If not, we keep the first (created
298+ // for the immediate caller) not-cold MIB node.
299+ bool LongerNotColdContextKept = false ;
300+ for (auto *MIB : NewMIBNodes) {
301+ auto MIBMD = cast<MDNode>(MIB);
302+ if (getMIBAllocType (MIBMD) == AllocationType::Cold)
303+ continue ;
304+ MDNode *StackMD = getMIBStackNode (MIBMD);
305+ assert (StackMD);
306+ if (StackMD->getNumOperands () > CallerContextLength) {
307+ LongerNotColdContextKept = true ;
308+ break ;
309+ }
310+ }
311+ // Don't need to emit any for the immediate caller if we already have
312+ // longer overlapping contexts;
313+ bool KeepFirstNewNotCold = !LongerNotColdContextKept;
314+ auto NewColdMIBNodes = make_filter_range (NewMIBNodes, [&](const Metadata *M) {
315+ auto MIBMD = cast<MDNode>(M);
316+ // Only keep cold contexts and first (longest non-cold context).
317+ if (getMIBAllocType (MIBMD) != AllocationType::Cold) {
318+ MDNode *StackMD = getMIBStackNode (MIBMD);
319+ assert (StackMD);
320+ // Keep any already kept for longer contexts.
321+ if (StackMD->getNumOperands () > CallerContextLength)
322+ return true ;
323+ // Otherwise keep the first one added by the immediate caller if there
324+ // were no longer contexts.
325+ if (KeepFirstNewNotCold) {
326+ KeepFirstNewNotCold = false ;
327+ return true ;
328+ }
329+ if (MemProfReportHintedSizes)
330+ EmitMessageForRemovedContexts (MIBMD, " pruned" , " " );
331+ return false ;
332+ }
333+ return true ;
334+ });
335+ for (auto *M : NewColdMIBNodes)
336+ SavedMIBNodes.push_back (M);
337+ }
338+
251339// Recursive helper to trim contexts and create metadata nodes.
252340// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
253341// caller makes it simpler to handle the many early returns in this method.
254342bool CallStackTrie::buildMIBNodes (CallStackTrieNode *Node, LLVMContext &Ctx,
255343 std::vector<uint64_t > &MIBCallStack,
256344 std::vector<Metadata *> &MIBNodes,
257- bool CalleeHasAmbiguousCallerContext,
258- bool &CalleeDeepestAmbiguousAllocType) {
345+ bool CalleeHasAmbiguousCallerContext) {
259346 // Trim context below the first node in a prefix with a single alloc type.
260347 // Add an MIB record for the current call stack prefix.
261348 if (hasSingleAllocType (Node->AllocTypes )) {
262- // Because we only clone cold contexts (we don't clone for exposing NotCold
263- // contexts as that is the default allocation behavior), we create MIB
264- // metadata for this context if any of the following are true:
265- // 1) It is cold.
266- // 2) The immediate callee is the deepest point where we have an ambiguous
267- // allocation type (i.e. the other callers that are cold need to know
268- // that we have a not cold context overlapping to this point so that we
269- // know how deep to clone).
270- // 3) MemProfKeepAllNotColdContexts is enabled, which is useful if we are
271- // reporting hinted sizes, and want to get information from the indexing
272- // step for all contexts, or have specified a value less than 100% for
273- // -memprof-cloning-cold-threshold.
274- if (Node->hasAllocType (AllocationType::Cold) ||
275- CalleeDeepestAmbiguousAllocType || MemProfKeepAllNotColdContexts) {
276- std::vector<ContextTotalSize> ContextSizeInfo;
277- collectContextSizeInfo (Node, ContextSizeInfo);
278- MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack,
279- (AllocationType)Node->AllocTypes ,
280- ContextSizeInfo));
281- // If we just emitted an MIB for a not cold caller, don't need to emit
282- // another one for the callee to correctly disambiguate its cold callers.
283- if (!Node->hasAllocType (AllocationType::Cold))
284- CalleeDeepestAmbiguousAllocType = false ;
285- }
349+ std::vector<ContextTotalSize> ContextSizeInfo;
350+ collectContextSizeInfo (Node, ContextSizeInfo);
351+ MIBNodes.push_back (createMIBNode (
352+ Ctx, MIBCallStack, (AllocationType)Node->AllocTypes , ContextSizeInfo));
286353 return true ;
287354 }
288355
@@ -291,14 +358,21 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
291358 if (!Node->Callers .empty ()) {
292359 bool NodeHasAmbiguousCallerContext = Node->Callers .size () > 1 ;
293360 bool AddedMIBNodesForAllCallerContexts = true ;
361+ // Accumulate all new MIB nodes by the recursive calls below into a vector
362+ // that will later be filtered before adding to the caller's MIBNodes
363+ // vector.
364+ std::vector<Metadata *> NewMIBNodes;
294365 for (auto &Caller : Node->Callers ) {
295366 MIBCallStack.push_back (Caller.first );
296- AddedMIBNodesForAllCallerContexts &= buildMIBNodes (
297- Caller.second , Ctx, MIBCallStack, MIBNodes ,
298- NodeHasAmbiguousCallerContext, Node-> DeepestAmbiguousAllocType );
367+ AddedMIBNodesForAllCallerContexts &=
368+ buildMIBNodes ( Caller.second , Ctx, MIBCallStack, NewMIBNodes ,
369+ NodeHasAmbiguousCallerContext );
299370 // Remove Caller.
300371 MIBCallStack.pop_back ();
301372 }
373+ // Pass in the stack length of the MIB nodes added for the immediate caller,
374+ // which is the current stack length plus 1.
375+ SaveFilteredNewMIBNodes (NewMIBNodes, MIBNodes, MIBCallStack.size () + 1 );
302376 if (AddedMIBNodesForAllCallerContexts)
303377 return true ;
304378 // We expect that the callers should be forced to add MIBs to disambiguate
@@ -372,13 +446,8 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
372446 // The CalleeHasAmbiguousCallerContext flag is meant to say whether the
373447 // callee of the given node has more than one caller. Here the node being
374448 // passed in is the alloc and it has no callees. So it's false.
375- // Similarly, the last parameter is meant to say whether the callee of the
376- // given node is the deepest point where we have ambiguous alloc types, which
377- // is also false as the alloc has no callees.
378- bool DeepestAmbiguousAllocType = true ;
379449 if (buildMIBNodes (Alloc, Ctx, MIBCallStack, MIBNodes,
380- /* CalleeHasAmbiguousCallerContext=*/ false ,
381- DeepestAmbiguousAllocType)) {
450+ /* CalleeHasAmbiguousCallerContext=*/ false )) {
382451 assert (MIBCallStack.size () == 1 &&
383452 " Should only be left with Alloc's location in stack" );
384453 CI->setMetadata (LLVMContext::MD_memprof, MDNode::get (Ctx, MIBNodes));
0 commit comments