1313#include " llvm/Analysis/MemoryProfileInfo.h"
1414#include " llvm/IR/Constants.h"
1515#include " llvm/Support/CommandLine.h"
16+ #include " llvm/Support/Format.h"
1617
1718using namespace llvm ;
1819using namespace llvm ::memprof;
@@ -58,6 +59,19 @@ cl::opt<bool> MemProfKeepAllNotColdContexts(
5859 " memprof-keep-all-not-cold-contexts" , cl::init(false ), cl::Hidden,
5960 cl::desc(" Keep all non-cold contexts (increases cloning overheads)" ));
6061
62+ cl::opt<unsigned > MinClonedColdBytePercent (
63+ " memprof-cloning-cold-threshold" , cl::init(100 ), cl::Hidden,
64+ cl::desc(" Min percent of cold bytes to hint alloc cold during cloning" ));
65+
66+ // Discard non-cold contexts if they overlap with much larger cold contexts,
67+ // specifically, if all contexts reaching a given callsite are at least this
68+ // percent cold byte allocations. This reduces the amount of cloning required
69+ // to expose the cold contexts when they greatly dominate non-cold contexts.
70+ cl::opt<unsigned > MinCallsiteColdBytePercent (
71+ " memprof-callsite-cold-threshold" , cl::init(100 ), cl::Hidden,
72+ cl::desc(" Min percent of cold bytes at a callsite to discard non-cold "
73+ " contexts" ));
74+
6175AllocationType llvm::memprof::getAllocType (uint64_t TotalLifetimeAccessDensity,
6276 uint64_t AllocCount,
6377 uint64_t TotalLifetime) {
@@ -208,21 +222,32 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
208222
209223static MDNode *createMIBNode (LLVMContext &Ctx, ArrayRef<uint64_t > MIBCallStack,
210224 AllocationType AllocType,
211- ArrayRef<ContextTotalSize> ContextSizeInfo) {
225+ ArrayRef<ContextTotalSize> ContextSizeInfo,
226+ uint64_t &TotalBytes, uint64_t &ColdBytes) {
212227 SmallVector<Metadata *> MIBPayload (
213228 {buildCallstackMetadata (MIBCallStack, Ctx)});
214229 MIBPayload.push_back (
215230 MDString::get (Ctx, getAllocTypeAttributeString (AllocType)));
216231 if (!ContextSizeInfo.empty ()) {
217232 for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
218- auto *FullStackIdMD = ValueAsMetadata::get (
219- ConstantInt::get (Type::getInt64Ty (Ctx), FullStackId));
220- auto *TotalSizeMD = ValueAsMetadata::get (
221- ConstantInt::get (Type::getInt64Ty (Ctx), TotalSize));
222- auto *ContextSizeMD = MDNode::get (Ctx, {FullStackIdMD, TotalSizeMD});
223- MIBPayload.push_back (ContextSizeMD);
233+ TotalBytes += TotalSize;
234+ if (AllocType == AllocationType::Cold)
235+ ColdBytes += TotalSize;
236+ // Only add the context size info as metadata if we need it in the thin
237+ // link (currently if reporting of hinted sizes is enabled or we have
238+ // specified a threshold for marking allocations cold after cloning).
239+ if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100 ) {
240+ auto *FullStackIdMD = ValueAsMetadata::get (
241+ ConstantInt::get (Type::getInt64Ty (Ctx), FullStackId));
242+ auto *TotalSizeMD = ValueAsMetadata::get (
243+ ConstantInt::get (Type::getInt64Ty (Ctx), TotalSize));
244+ auto *ContextSizeMD = MDNode::get (Ctx, {FullStackIdMD, TotalSizeMD});
245+ MIBPayload.push_back (ContextSizeMD);
246+ }
224247 }
225248 }
249+ assert (MinCallsiteColdBytePercent >= 100 ||
250+ (!ContextSizeInfo.empty () && TotalBytes > 0 ));
226251 return MDNode::get (Ctx, MIBPayload);
227252}
228253
@@ -246,9 +271,13 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
246271// on options that enable filtering out some NotCold contexts.
247272static void saveFilteredNewMIBNodes (std::vector<Metadata *> &NewMIBNodes,
248273 std::vector<Metadata *> &SavedMIBNodes,
249- unsigned CallerContextLength) {
274+ unsigned CallerContextLength,
275+ uint64_t TotalBytes, uint64_t ColdBytes) {
276+ bool MostlyCold = MinCallsiteColdBytePercent < 100 &&
277+ ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes;
278+
250279 // In the simplest case, with pruning disabled, keep all the new MIB nodes.
251- if (MemProfKeepAllNotColdContexts) {
280+ if (MemProfKeepAllNotColdContexts && !MostlyCold ) {
252281 append_range (SavedMIBNodes, NewMIBNodes);
253282 return ;
254283 }
@@ -271,6 +300,27 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
271300 }
272301 };
273302
303+ if (MostlyCold) {
304+ auto NewColdMIBNodes =
305+ make_filter_range (NewMIBNodes, [&](const Metadata *M) {
306+ auto MIBMD = cast<MDNode>(M);
307+ // Only append cold contexts.
308+ if (getMIBAllocType (MIBMD) == AllocationType::Cold)
309+ return true ;
310+ if (MemProfReportHintedSizes) {
311+ float PercentCold = ColdBytes * 100.0 / TotalBytes;
312+ std::string PercentStr;
313+ llvm::raw_string_ostream OS (PercentStr);
314+ OS << format (" for %5.2f%% cold bytes" , PercentCold);
315+ EmitMessageForRemovedContexts (MIBMD, " discarded" , OS.str ());
316+ }
317+ return false ;
318+ });
319+ for (auto *M : NewColdMIBNodes)
320+ SavedMIBNodes.push_back (M);
321+ return ;
322+ }
323+
274324 // Prune unneeded NotCold contexts, taking advantage of the fact
275325 // that we later will only clone Cold contexts, as NotCold is the allocation
276326 // default. We only need to keep as metadata the NotCold contexts that
@@ -341,17 +391,20 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
341391// Recursive helper to trim contexts and create metadata nodes.
342392// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
343393// caller makes it simpler to handle the many early returns in this method.
394+ // Updates the total and cold profiled bytes in the subtrie rooted at this node.
344395bool CallStackTrie::buildMIBNodes (CallStackTrieNode *Node, LLVMContext &Ctx,
345396 std::vector<uint64_t > &MIBCallStack,
346397 std::vector<Metadata *> &MIBNodes,
347- bool CalleeHasAmbiguousCallerContext) {
398+ bool CalleeHasAmbiguousCallerContext,
399+ uint64_t &TotalBytes, uint64_t &ColdBytes) {
348400 // Trim context below the first node in a prefix with a single alloc type.
349401 // Add an MIB record for the current call stack prefix.
350402 if (hasSingleAllocType (Node->AllocTypes )) {
351403 std::vector<ContextTotalSize> ContextSizeInfo;
352404 collectContextSizeInfo (Node, ContextSizeInfo);
353- MIBNodes.push_back (createMIBNode (
354- Ctx, MIBCallStack, (AllocationType)Node->AllocTypes , ContextSizeInfo));
405+ MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack,
406+ (AllocationType)Node->AllocTypes ,
407+ ContextSizeInfo, TotalBytes, ColdBytes));
355408 return true ;
356409 }
357410
@@ -364,17 +417,25 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
364417 // that will later be filtered before adding to the caller's MIBNodes
365418 // vector.
366419 std::vector<Metadata *> NewMIBNodes;
420+ // Determine the total and cold byte counts for all callers, then add to the
421+ // caller's counts further below.
422+ uint64_t CallerTotalBytes = 0 ;
423+ uint64_t CallerColdBytes = 0 ;
367424 for (auto &Caller : Node->Callers ) {
368425 MIBCallStack.push_back (Caller.first );
369- AddedMIBNodesForAllCallerContexts &=
370- buildMIBNodes ( Caller.second , Ctx, MIBCallStack, NewMIBNodes,
371- NodeHasAmbiguousCallerContext );
426+ AddedMIBNodesForAllCallerContexts &= buildMIBNodes (
427+ Caller.second , Ctx, MIBCallStack, NewMIBNodes,
428+ NodeHasAmbiguousCallerContext, CallerTotalBytes, CallerColdBytes );
372429 // Remove Caller.
373430 MIBCallStack.pop_back ();
374431 }
375432 // Pass in the stack length of the MIB nodes added for the immediate caller,
376433 // which is the current stack length plus 1.
377- saveFilteredNewMIBNodes (NewMIBNodes, MIBNodes, MIBCallStack.size () + 1 );
434+ saveFilteredNewMIBNodes (NewMIBNodes, MIBNodes, MIBCallStack.size () + 1 ,
435+ CallerTotalBytes, CallerColdBytes);
436+ TotalBytes += CallerTotalBytes;
437+ ColdBytes += CallerColdBytes;
438+
378439 if (AddedMIBNodesForAllCallerContexts)
379440 return true ;
380441 // We expect that the callers should be forced to add MIBs to disambiguate
@@ -397,7 +458,7 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
397458 std::vector<ContextTotalSize> ContextSizeInfo;
398459 collectContextSizeInfo (Node, ContextSizeInfo);
399460 MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack, AllocationType::NotCold,
400- ContextSizeInfo));
461+ ContextSizeInfo, TotalBytes, ColdBytes ));
401462 return true ;
402463}
403464
@@ -444,12 +505,15 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
444505 std::vector<uint64_t > MIBCallStack;
445506 MIBCallStack.push_back (AllocStackId);
446507 std::vector<Metadata *> MIBNodes;
508+ uint64_t TotalBytes = 0 ;
509+ uint64_t ColdBytes = 0 ;
447510 assert (!Alloc->Callers .empty () && " addCallStack has not been called yet" );
448511 // The CalleeHasAmbiguousCallerContext flag is meant to say whether the
449512 // callee of the given node has more than one caller. Here the node being
450513 // passed in is the alloc and it has no callees. So it's false.
451514 if (buildMIBNodes (Alloc, Ctx, MIBCallStack, MIBNodes,
452- /* CalleeHasAmbiguousCallerContext=*/ false )) {
515+ /* CalleeHasAmbiguousCallerContext=*/ false , TotalBytes,
516+ ColdBytes)) {
453517 assert (MIBCallStack.size () == 1 &&
454518 " Should only be left with Alloc's location in stack" );
455519 CI->setMetadata (LLVMContext::MD_memprof, MDNode::get (Ctx, MIBNodes));
0 commit comments