@@ -51,6 +51,13 @@ cl::opt<bool> MemProfReportHintedSizes(
5151 " memprof-report-hinted-sizes" , cl::init(false ), cl::Hidden,
5252 cl::desc(" Report total allocation sizes of hinted allocations" ));
5353
54+ // This is useful if we have enabled reporting of hinted sizes, and want to get
55+ // information from the indexing step for all contexts (especially for testing),
56+ // or have specified a value less than 100% for -memprof-cloning-cold-threshold.
57+ cl::opt<bool > MemProfKeepAllNotColdContexts (
58+ " memprof-keep-all-not-cold-contexts" , cl::init(false ), cl::Hidden,
59+ cl::desc(" Keep all non-cold contexts (increases cloning overheads)" ));
60+
5461AllocationType llvm::memprof::getAllocType (uint64_t TotalLifetimeAccessDensity,
5562 uint64_t AllocCount,
5663 uint64_t TotalLifetime) {
@@ -156,10 +163,16 @@ void CallStackTrie::addCallStack(
156163 continue ;
157164 }
158165 // Update existing caller node if it exists.
166+ CallStackTrieNode *Prev = nullptr ;
159167 auto Next = Curr->Callers .find (StackId);
160168 if (Next != Curr->Callers .end ()) {
169+ Prev = Curr;
161170 Curr = Next->second ;
162171 Curr->addAllocType (AllocType);
172+ // If this node has an ambiguous alloc type, its callee is not the deepest
173+ // point where we have an ambigous allocation type.
174+ if (!hasSingleAllocType (Curr->AllocTypes ))
175+ Prev->DeepestAmbiguousAllocType = false ;
163176 continue ;
164177 }
165178 // Otherwise add a new caller node.
@@ -243,14 +256,35 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
243256bool CallStackTrie::buildMIBNodes (CallStackTrieNode *Node, LLVMContext &Ctx,
244257 std::vector<uint64_t > &MIBCallStack,
245258 std::vector<Metadata *> &MIBNodes,
246- bool CalleeHasAmbiguousCallerContext) {
259+ bool CalleeHasAmbiguousCallerContext,
260+ bool &CalleeDeepestAmbiguousAllocType) {
247261 // Trim context below the first node in a prefix with a single alloc type.
248262 // Add an MIB record for the current call stack prefix.
249263 if (hasSingleAllocType (Node->AllocTypes )) {
250- std::vector<ContextTotalSize> ContextSizeInfo;
251- collectContextSizeInfo (Node, ContextSizeInfo);
252- MIBNodes.push_back (createMIBNode (
253- Ctx, MIBCallStack, (AllocationType)Node->AllocTypes , ContextSizeInfo));
264+ // Because we only clone cold contexts (we don't clone for exposing NotCold
265+ // contexts as that is the default allocation behavior), we create MIB
266+ // metadata for this context if any of the following are true:
267+ // 1) It is cold.
268+ // 2) The immediate callee is the deepest point where we have an ambiguous
269+ // allocation type (i.e. the other callers that are cold need to know
270+ // that we have a not cold context overlapping to this point so that we
271+ // know how deep to clone).
272+ // 3) MemProfKeepAllNotColdContexts is enabled, which is useful if we are
273+ // reporting hinted sizes, and want to get information from the indexing
274+ // step for all contexts, or have specified a value less than 100% for
275+ // -memprof-cloning-cold-threshold.
276+ if (Node->hasAllocType (AllocationType::Cold) ||
277+ CalleeDeepestAmbiguousAllocType || MemProfKeepAllNotColdContexts) {
278+ std::vector<ContextTotalSize> ContextSizeInfo;
279+ collectContextSizeInfo (Node, ContextSizeInfo);
280+ MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack,
281+ (AllocationType)Node->AllocTypes ,
282+ ContextSizeInfo));
283+ // If we just emitted an MIB for a not cold caller, don't need to emit
284+ // another one for the callee to correctly disambiguate its cold callers.
285+ if (!Node->hasAllocType (AllocationType::Cold))
286+ CalleeDeepestAmbiguousAllocType = false ;
287+ }
254288 return true ;
255289 }
256290
@@ -261,9 +295,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
261295 bool AddedMIBNodesForAllCallerContexts = true ;
262296 for (auto &Caller : Node->Callers ) {
263297 MIBCallStack.push_back (Caller.first );
264- AddedMIBNodesForAllCallerContexts &=
265- buildMIBNodes ( Caller.second , Ctx, MIBCallStack, MIBNodes,
266- NodeHasAmbiguousCallerContext);
298+ AddedMIBNodesForAllCallerContexts &= buildMIBNodes (
299+ Caller.second , Ctx, MIBCallStack, MIBNodes,
300+ NodeHasAmbiguousCallerContext, Node-> DeepestAmbiguousAllocType );
267301 // Remove Caller.
268302 MIBCallStack.pop_back ();
269303 }
@@ -337,10 +371,16 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
337371 MIBCallStack.push_back (AllocStackId);
338372 std::vector<Metadata *> MIBNodes;
339373 assert (!Alloc->Callers .empty () && " addCallStack has not been called yet" );
340- // The last parameter is meant to say whether the callee of the given node
341- // has more than one caller. Here the node being passed in is the alloc
342- // and it has no callees. So it's false.
343- if (buildMIBNodes (Alloc, Ctx, MIBCallStack, MIBNodes, false )) {
374+ // The CalleeHasAmbiguousCallerContext flag is meant to say whether the
375+ // callee of the given node has more than one caller. Here the node being
376+ // passed in is the alloc and it has no callees. So it's false.
377+ // Similarly, the last parameter is meant to say whether the callee of the
378+ // given node is the deepest point where we have ambiguous alloc types, which
379+ // is also false as the alloc has no callees.
380+ bool DeepestAmbiguousAllocType = true ;
381+ if (buildMIBNodes (Alloc, Ctx, MIBCallStack, MIBNodes,
382+ /* CalleeHasAmbiguousCallerContext=*/ false ,
383+ DeepestAmbiguousAllocType)) {
344384 assert (MIBCallStack.size () == 1 &&
345385 " Should only be left with Alloc's location in stack" );
346386 CI->setMetadata (LLVMContext::MD_memprof, MDNode::get (Ctx, MIBNodes));
0 commit comments