1313#include " llvm/Analysis/MemoryProfileInfo.h"
1414#include " llvm/IR/Constants.h"
1515#include " llvm/Support/CommandLine.h"
16+ #include " llvm/Support/Format.h"
1617
1718using namespace llvm ;
1819using namespace llvm ::memprof;
@@ -58,6 +59,19 @@ cl::opt<bool> MemProfKeepAllNotColdContexts(
5859 " memprof-keep-all-not-cold-contexts" , cl::init(false ), cl::Hidden,
5960 cl::desc(" Keep all non-cold contexts (increases cloning overheads)" ));
6061
62+ cl::opt<unsigned > MinClonedColdBytePercent (
63+ " memprof-cloning-cold-threshold" , cl::init(100 ), cl::Hidden,
64+ cl::desc(" Min percent of cold bytes to hint alloc cold during cloning" ));
65+
66+ // Discard non-cold contexts if they overlap with much larger cold contexts,
67+ // specifically, if all contexts reaching a given callsite are at least this
68+ // percent cold byte allocations. This reduces the amount of cloning required
69+ // to expose the cold contexts when they greatly dominate non-cold contexts.
70+ cl::opt<unsigned > MinCallsiteColdBytePercent (
71+ " memprof-callsite-cold-threshold" , cl::init(100 ), cl::Hidden,
72+ cl::desc(" Min percent of cold bytes at a callsite to discard non-cold "
73+ " contexts" ));
74+
6175AllocationType llvm::memprof::getAllocType (uint64_t TotalLifetimeAccessDensity,
6276 uint64_t AllocCount,
6377 uint64_t TotalLifetime) {
@@ -208,13 +222,29 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
208222
209223static MDNode *createMIBNode (LLVMContext &Ctx, ArrayRef<uint64_t > MIBCallStack,
210224 AllocationType AllocType,
211- ArrayRef<ContextTotalSize> ContextSizeInfo) {
225+ ArrayRef<ContextTotalSize> ContextSizeInfo,
226+ uint64_t &TotalBytes, uint64_t &ColdBytes) {
212227 SmallVector<Metadata *> MIBPayload (
213228 {buildCallstackMetadata (MIBCallStack, Ctx)});
214229 MIBPayload.push_back (
215230 MDString::get (Ctx, getAllocTypeAttributeString (AllocType)));
216- if (!ContextSizeInfo.empty ()) {
217- for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
231+
232+ if (ContextSizeInfo.empty ()) {
233+ // The profile matcher should have provided context size info if there was a
234+ // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully
235+ // handle a user-provided percent larger than 100.
236+ assert (MinCallsiteColdBytePercent >= 100 );
237+ return MDNode::get (Ctx, MIBPayload);
238+ }
239+
240+ for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
241+ TotalBytes += TotalSize;
242+ if (AllocType == AllocationType::Cold)
243+ ColdBytes += TotalSize;
244+ // Only add the context size info as metadata if we need it in the thin
245+ // link (currently if reporting of hinted sizes is enabled or we have
246+ // specified a threshold for marking allocations cold after cloning).
247+ if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100 ) {
218248 auto *FullStackIdMD = ValueAsMetadata::get (
219249 ConstantInt::get (Type::getInt64Ty (Ctx), FullStackId));
220250 auto *TotalSizeMD = ValueAsMetadata::get (
@@ -223,6 +253,7 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
223253 MIBPayload.push_back (ContextSizeMD);
224254 }
225255 }
256+ assert (TotalBytes > 0 );
226257 return MDNode::get (Ctx, MIBPayload);
227258}
228259
@@ -246,9 +277,14 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
246277// on options that enable filtering out some NotCold contexts.
247278static void saveFilteredNewMIBNodes (std::vector<Metadata *> &NewMIBNodes,
248279 std::vector<Metadata *> &SavedMIBNodes,
249- unsigned CallerContextLength) {
280+ unsigned CallerContextLength,
281+ uint64_t TotalBytes, uint64_t ColdBytes) {
282+ const bool MostlyCold =
283+ MinCallsiteColdBytePercent < 100 &&
284+ ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes;
285+
250286 // In the simplest case, with pruning disabled, keep all the new MIB nodes.
251- if (MemProfKeepAllNotColdContexts) {
287+ if (MemProfKeepAllNotColdContexts && !MostlyCold ) {
252288 append_range (SavedMIBNodes, NewMIBNodes);
253289 return ;
254290 }
@@ -271,6 +307,30 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
271307 }
272308 };
273309
310+ // If the cold bytes at the current callsite exceed the given threshold, we
311+ // discard all non-cold contexts so do not need any of the later pruning
312+ // handling. We can simply copy over all the cold contexts and return early.
313+ if (MostlyCold) {
314+ auto NewColdMIBNodes =
315+ make_filter_range (NewMIBNodes, [&](const Metadata *M) {
316+ auto MIBMD = cast<MDNode>(M);
317+ // Only append cold contexts.
318+ if (getMIBAllocType (MIBMD) == AllocationType::Cold)
319+ return true ;
320+ if (MemProfReportHintedSizes) {
321+ const float PercentCold = ColdBytes * 100.0 / TotalBytes;
322+ std::string PercentStr;
323+ llvm::raw_string_ostream OS (PercentStr);
324+ OS << format (" for %5.2f%% cold bytes" , PercentCold);
325+ EmitMessageForRemovedContexts (MIBMD, " discarded" , OS.str ());
326+ }
327+ return false ;
328+ });
329+ for (auto *M : NewColdMIBNodes)
330+ SavedMIBNodes.push_back (M);
331+ return ;
332+ }
333+
274334 // Prune unneeded NotCold contexts, taking advantage of the fact
275335 // that we later will only clone Cold contexts, as NotCold is the allocation
276336 // default. We only need to keep as metadata the NotCold contexts that
@@ -341,17 +401,20 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
341401// Recursive helper to trim contexts and create metadata nodes.
342402// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
343403// caller makes it simpler to handle the many early returns in this method.
404+ // Updates the total and cold profiled bytes in the subtrie rooted at this node.
344405bool CallStackTrie::buildMIBNodes (CallStackTrieNode *Node, LLVMContext &Ctx,
345406 std::vector<uint64_t > &MIBCallStack,
346407 std::vector<Metadata *> &MIBNodes,
347- bool CalleeHasAmbiguousCallerContext) {
408+ bool CalleeHasAmbiguousCallerContext,
409+ uint64_t &TotalBytes, uint64_t &ColdBytes) {
348410 // Trim context below the first node in a prefix with a single alloc type.
349411 // Add an MIB record for the current call stack prefix.
350412 if (hasSingleAllocType (Node->AllocTypes )) {
351413 std::vector<ContextTotalSize> ContextSizeInfo;
352414 collectContextSizeInfo (Node, ContextSizeInfo);
353- MIBNodes.push_back (createMIBNode (
354- Ctx, MIBCallStack, (AllocationType)Node->AllocTypes , ContextSizeInfo));
415+ MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack,
416+ (AllocationType)Node->AllocTypes ,
417+ ContextSizeInfo, TotalBytes, ColdBytes));
355418 return true ;
356419 }
357420
@@ -364,17 +427,25 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
364427 // that will later be filtered before adding to the caller's MIBNodes
365428 // vector.
366429 std::vector<Metadata *> NewMIBNodes;
430+ // Determine the total and cold byte counts for all callers, then add to the
431+ // caller's counts further below.
432+ uint64_t CallerTotalBytes = 0 ;
433+ uint64_t CallerColdBytes = 0 ;
367434 for (auto &Caller : Node->Callers ) {
368435 MIBCallStack.push_back (Caller.first );
369- AddedMIBNodesForAllCallerContexts &=
370- buildMIBNodes ( Caller.second , Ctx, MIBCallStack, NewMIBNodes,
371- NodeHasAmbiguousCallerContext );
436+ AddedMIBNodesForAllCallerContexts &= buildMIBNodes (
437+ Caller.second , Ctx, MIBCallStack, NewMIBNodes,
438+ NodeHasAmbiguousCallerContext, CallerTotalBytes, CallerColdBytes );
372439 // Remove Caller.
373440 MIBCallStack.pop_back ();
374441 }
375442 // Pass in the stack length of the MIB nodes added for the immediate caller,
376443 // which is the current stack length plus 1.
377- saveFilteredNewMIBNodes (NewMIBNodes, MIBNodes, MIBCallStack.size () + 1 );
444+ saveFilteredNewMIBNodes (NewMIBNodes, MIBNodes, MIBCallStack.size () + 1 ,
445+ CallerTotalBytes, CallerColdBytes);
446+ TotalBytes += CallerTotalBytes;
447+ ColdBytes += CallerColdBytes;
448+
378449 if (AddedMIBNodesForAllCallerContexts)
379450 return true ;
380451 // We expect that the callers should be forced to add MIBs to disambiguate
@@ -397,7 +468,7 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
397468 std::vector<ContextTotalSize> ContextSizeInfo;
398469 collectContextSizeInfo (Node, ContextSizeInfo);
399470 MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack, AllocationType::NotCold,
400- ContextSizeInfo));
471+ ContextSizeInfo, TotalBytes, ColdBytes ));
401472 return true ;
402473}
403474
@@ -444,12 +515,15 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
444515 std::vector<uint64_t > MIBCallStack;
445516 MIBCallStack.push_back (AllocStackId);
446517 std::vector<Metadata *> MIBNodes;
518+ uint64_t TotalBytes = 0 ;
519+ uint64_t ColdBytes = 0 ;
447520 assert (!Alloc->Callers .empty () && " addCallStack has not been called yet" );
448521 // The CalleeHasAmbiguousCallerContext flag is meant to say whether the
449522 // callee of the given node has more than one caller. Here the node being
450523 // passed in is the alloc and it has no callees. So it's false.
451524 if (buildMIBNodes (Alloc, Ctx, MIBCallStack, MIBNodes,
452- /* CalleeHasAmbiguousCallerContext=*/ false )) {
525+ /* CalleeHasAmbiguousCallerContext=*/ false , TotalBytes,
526+ ColdBytes)) {
453527 assert (MIBCallStack.size () == 1 &&
454528 " Should only be left with Alloc's location in stack" );
455529 CI->setMetadata (LLVMContext::MD_memprof, MDNode::get (Ctx, MIBNodes));
0 commit comments