diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 8cbb8673b69f5..b042a717e4e49 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -24,6 +24,18 @@ class OptimizationRemarkEmitter; namespace memprof { +/// Whether the alloc memeprof metadata will include context size info for all +/// MIBs. +LLVM_ABI bool metadataIncludesAllContextSizeInfo(); + +/// Whether the alloc memprof metadata may include context size info for some +/// MIBs (but possibly not all). +LLVM_ABI bool metadataMayIncludeContextSizeInfo(); + +/// Whether we need to record the context size info in the alloc trie used to +/// build metadata. +LLVM_ABI bool recordContextSizeInfoForAnalysis(); + /// Build callstack metadata from the provided list of call stack ids. Returns /// the resulting metadata node. LLVM_ABI MDNode *buildCallstackMetadata(ArrayRef CallStack, @@ -87,6 +99,9 @@ class CallStackTrie { // allocations for which we apply non-context sensitive allocation hints. OptimizationRemarkEmitter *ORE; + // The maximum size of a cold allocation context, from the profile summary. + uint64_t MaxColdSize; + void deleteTrieNode(CallStackTrieNode *Node) { if (!Node) return; @@ -113,7 +128,9 @@ class CallStackTrie { uint64_t &ColdBytes); public: - CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr) : ORE(ORE) {} + CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr, + uint64_t MaxColdSize = 0) + : ORE(ORE), MaxColdSize(MaxColdSize) {} ~CallStackTrie() { deleteTrieNode(Alloc); } bool empty() const { return Alloc == nullptr; } diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index 347377522101a..c08024a38ffc2 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -46,6 +46,25 @@ cl::opt MinCallsiteColdBytePercent( cl::desc("Min percent of cold bytes at a callsite to discard non-cold " "contexts")); +// Enable saving context size information for largest cold contexts, which can +// be used to flag contexts for more aggressive cloning and reporting. +cl::opt MinPercentMaxColdSize( + "memprof-min-percent-max-cold-size", cl::init(100), cl::Hidden, + cl::desc("Min percent of max cold bytes for critical cold context")); + +bool llvm::memprof::metadataIncludesAllContextSizeInfo() { + return MemProfReportHintedSizes || MinClonedColdBytePercent < 100; +} + +bool llvm::memprof::metadataMayIncludeContextSizeInfo() { + return metadataIncludesAllContextSizeInfo() || MinPercentMaxColdSize < 100; +} + +bool llvm::memprof::recordContextSizeInfoForAnalysis() { + return metadataMayIncludeContextSizeInfo() || + MinCallsiteColdBytePercent < 100; +} + MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef CallStack, LLVMContext &Ctx) { SmallVector StackVals; @@ -168,7 +187,8 @@ void CallStackTrie::addCallStack(MDNode *MIB) { static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, AllocationType AllocType, ArrayRef ContextSizeInfo, - uint64_t &TotalBytes, uint64_t &ColdBytes) { + const uint64_t MaxColdSize, uint64_t &TotalBytes, + uint64_t &ColdBytes) { SmallVector MIBPayload( {buildCallstackMetadata(MIBCallStack, Ctx)}); MIBPayload.push_back( @@ -184,12 +204,21 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { TotalBytes += TotalSize; - if (AllocType == AllocationType::Cold) + bool LargeColdContext = false; + if (AllocType == AllocationType::Cold) { ColdBytes += TotalSize; + // If we have the max cold context size from summary information and have + // requested identification of contexts above a percentage of the max, see + // if this context qualifies. + if (MaxColdSize > 0 && MinPercentMaxColdSize < 100 && + TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize) + LargeColdContext = true; + } // Only add the context size info as metadata if we need it in the thin - // link (currently if reporting of hinted sizes is enabled or we have - // specified a threshold for marking allocations cold after cloning). - if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) { + // link (currently if reporting of hinted sizes is enabled, we have + // specified a threshold for marking allocations cold after cloning, or we + // have identified this as a large cold context of interest above). + if (metadataIncludesAllContextSizeInfo() || LargeColdContext) { auto *FullStackIdMD = ValueAsMetadata::get( ConstantInt::get(Type::getInt64Ty(Ctx), FullStackId)); auto *TotalSizeMD = ValueAsMetadata::get( @@ -357,9 +386,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, if (hasSingleAllocType(Node->AllocTypes)) { std::vector ContextSizeInfo; collectContextSizeInfo(Node, ContextSizeInfo); - MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, - (AllocationType)Node->AllocTypes, - ContextSizeInfo, TotalBytes, ColdBytes)); + MIBNodes.push_back( + createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, + ContextSizeInfo, MaxColdSize, TotalBytes, ColdBytes)); return true; } @@ -413,7 +442,8 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, std::vector ContextSizeInfo; collectContextSizeInfo(Node, ContextSizeInfo); MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold, - ContextSizeInfo, TotalBytes, ColdBytes)); + ContextSizeInfo, MaxColdSize, TotalBytes, + ColdBytes)); return true; } diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 59fa1a4b03c37..a317ac471a231 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -525,6 +525,7 @@ static void computeFunctionSummary( if (MemProfMD) { std::vector MIBs; std::vector> ContextSizeInfos; + bool HasNonZeroContextSizeInfos = false; for (auto &MDOp : MemProfMD->operands()) { auto *MIBMD = cast(MDOp); MDNode *StackNode = getMIBStackNode(MIBMD); @@ -544,7 +545,8 @@ static void computeFunctionSummary( } // If we have context size information, collect it for inclusion in // the summary. - assert(MIBMD->getNumOperands() > 2 || !MemProfReportHintedSizes); + assert(MIBMD->getNumOperands() > 2 || + !metadataIncludesAllContextSizeInfo()); if (MIBMD->getNumOperands() > 2) { std::vector ContextSizes; for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) { @@ -558,14 +560,31 @@ static void computeFunctionSummary( ->getZExtValue(); ContextSizes.push_back({FullStackId, TS}); } + // Flag that we need to keep the ContextSizeInfos array for this + // alloc as it now contains non-zero context info sizes. + HasNonZeroContextSizeInfos = true; ContextSizeInfos.push_back(std::move(ContextSizes)); + } else { + // The ContextSizeInfos must be in the same relative position as the + // associated MIB. In some cases we only include a ContextSizeInfo + // for a subset of MIBs in an allocation. To handle that, eagerly + // fill any MIB entries that don't have context size info metadata + // with a pair of 0s. Later on we will only use this array if it + // ends up containing any non-zero entries (see where we set + // HasNonZeroContextSizeInfos above). + ContextSizeInfos.push_back({{0, 0}}); } MIBs.push_back( MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices))); } Allocs.push_back(AllocInfo(std::move(MIBs))); - assert(!ContextSizeInfos.empty() || !MemProfReportHintedSizes); - if (!ContextSizeInfos.empty()) { + assert(HasNonZeroContextSizeInfos || + !metadataIncludesAllContextSizeInfo()); + // We eagerly build the ContextSizeInfos array, but it will be filled + // with sub arrays of pairs of 0s if no MIBs on this alloc actually + // contained context size info metadata. Only save it if any MIBs had + // any such metadata. + if (HasNonZeroContextSizeInfos) { assert(Allocs.back().MIBs.size() == ContextSizeInfos.size()); Allocs.back().ContextSizeInfos = std::move(ContextSizeInfos); } diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 47388c232b1ad..105edb943eb7f 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -8164,6 +8164,14 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { ContextSizes.reserve(NumContextSizeInfoEntries); for (unsigned J = 0; J < NumContextSizeInfoEntries; J++) { assert(ContextIdIndex < PendingContextIds.size()); + // Skip any 0 entries for MIBs without the context size info. + if (PendingContextIds[ContextIdIndex] == 0) { + // The size should also be 0 if the context was 0. + assert(!Record[I]); + ContextIdIndex++; + I++; + continue; + } // PendingContextIds read from the preceding FS_ALLOC_CONTEXT_IDS // should be in the same order as the total sizes. ContextSizes.push_back( diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8789b3123cd60..fad8ebfad9f9a 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Bitcode/BitcodeCommon.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -4585,14 +4586,23 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { Stream.EmitRecord(bitc::FS_STACK_IDS, Vals, StackIdAbbvId); } - // n x context id - auto ContextIdAbbv = std::make_shared(); - ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS)); - ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - // The context ids are hashes that are close to 64 bits in size, so emitting - // as a pair of 32-bit fixed-width values is more efficient than a VBR. - ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); - unsigned ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv)); + unsigned ContextIdAbbvId = 0; + if (metadataMayIncludeContextSizeInfo()) { + // n x context id + auto ContextIdAbbv = std::make_shared(); + ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS)); + ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + // The context ids are hashes that are close to 64 bits in size, so emitting + // as a pair of 32-bit fixed-width values is more efficient than a VBR if we + // are emitting them for all MIBs. Otherwise we use VBR to better compress 0 + // values that are expected to more frequently occur in an alloc's memprof + // summary. + if (metadataIncludesAllContextSizeInfo()) + ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + else + ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv)); + } // Abbrev for FS_PERMODULE_PROFILE. Abbv = std::make_shared(); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 5b4350845b726..cff38a8e68c6a 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -2232,9 +2232,8 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( CallStack::const_iterator> EmptyContext; unsigned I = 0; - assert( - (!MemProfReportHintedSizes && MinClonedColdBytePercent >= 100) || - AN.ContextSizeInfos.size() == AN.MIBs.size()); + assert(!metadataMayIncludeContextSizeInfo() || + AN.ContextSizeInfos.size() == AN.MIBs.size()); // Now add all of the MIBs and their stack nodes. for (auto &MIB : AN.MIBs) { CallStack::const_iterator> diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index e06add7cf313b..3f277b1b84ac1 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -184,10 +184,6 @@ static cl::opt ClMemProfAttachCalleeGuids( "Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden); -extern cl::opt MemProfReportHintedSizes; -extern cl::opt MinClonedColdBytePercent; -extern cl::opt MinCallsiteColdBytePercent; - static cl::opt MinMatchedColdBytePercent( "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold")); @@ -299,13 +295,6 @@ class ModuleMemProfiler { Function *MemProfCtorFunction = nullptr; }; -// Options under which we need to record the context size info in the alloc trie -// used to build metadata. -bool recordContextSizeInfo() { - return MemProfReportHintedSizes || MinClonedColdBytePercent < 100 || - MinCallsiteColdBytePercent < 100; -} - } // end anonymous namespace MemProfilerPass::MemProfilerPass() = default; @@ -758,7 +747,7 @@ static AllocationType addCallStack(CallStackTrie &AllocTrie, AllocInfo->Info.getAllocCount(), AllocInfo->Info.getTotalLifetime()); std::vector ContextSizeInfo; - if (recordContextSizeInfo()) { + if (recordContextSizeInfoForAnalysis()) { auto TotalSize = AllocInfo->Info.getTotalSize(); assert(TotalSize); assert(FullStackId != 0); @@ -1003,7 +992,7 @@ static void readMemprof(Module &M, Function &F, &FullStackIdToAllocMatchInfo, std::set> &MatchedCallSites, DenseMap &UndriftMaps, - OptimizationRemarkEmitter &ORE) { + OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) { auto &Ctx = M.getContext(); // Previously we used getIRPGOFuncName() here. If F is local linkage, // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But @@ -1192,7 +1181,7 @@ static void readMemprof(Module &M, Function &F, // We may match this instruction's location list to multiple MIB // contexts. Add them to a Trie specialized for trimming the contexts to // the minimal needed to disambiguate contexts with unique behavior. - CallStackTrie AllocTrie(&ORE); + CallStackTrie AllocTrie(&ORE, MaxColdSize); uint64_t TotalSize = 0; uint64_t TotalColdSize = 0; for (auto *AllocInfo : AllocInfoIter->second) { @@ -1203,7 +1192,7 @@ static void readMemprof(Module &M, Function &F, InlinedCallStack)) { NumOfMemProfMatchedAllocContexts++; uint64_t FullStackId = 0; - if (ClPrintMemProfMatchInfo || recordContextSizeInfo()) + if (ClPrintMemProfMatchInfo || recordContextSizeInfoForAnalysis()) FullStackId = computeFullStackId(AllocInfo->CallStack); auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId); TotalSize += AllocInfo->Info.getTotalSize(); @@ -1340,6 +1329,10 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { // call stack. std::set> MatchedCallSites; + uint64_t MaxColdSize = 0; + if (auto *MemProfSum = MemProfReader->getMemProfSummary()) + MaxColdSize = MemProfSum->getMaxColdTotalSize(); + for (auto &F : M) { if (F.isDeclaration()) continue; @@ -1347,7 +1340,7 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { const TargetLibraryInfo &TLI = FAM.getResult(F); auto &ORE = FAM.getResult(F); readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo, - MatchedCallSites, UndriftMaps, ORE); + MatchedCallSites, UndriftMaps, ORE, MaxColdSize); } if (ClPrintMemProfMatchInfo) { diff --git a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll b/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll new file mode 100644 index 0000000000000..d4a3f9bca2cab --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll @@ -0,0 +1,73 @@ +;; Test that we get hinted size reporting for just the subset of MIBs that +;; contain context size info in the metadata. + +;; Generate the bitcode including ThinLTO summary. Specify +;; -memprof-min-percent-max-cold-size (value doesn't matter) to indicate to +;; the bitcode writer that it should expect and optimize for partial context +;; size info. +; RUN: opt -thinlto-bc -memprof-min-percent-max-cold-size=50 %s >%t.o + +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-report-hinted-sizes \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=SIZES + +;; We should only get these two messages from -memprof-report-hinted-sizes +;; as they are the only MIBs with recorded context size info. +; SIZES-NOT: full allocation context +; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2) +; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2) +; SIZES-NOT: full allocation context + +source_filename = "memprof-report-hinted-partial.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Z3foov(), !callsite !0 + %call1 = call ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +define internal ptr @_Z3barv() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() #0 { +entry: + %call = call ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +define internal ptr @_Z3foov() #0 { +entry: + %call = call ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { noinline optnone } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5, !13} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold", !11, !12} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} +!11 = !{i64 456, i64 200} +!12 = !{i64 789, i64 300} +!13 = !{!14, !"cold"} +!14 = !{i64 9086428284934609951, i64 12345} diff --git a/llvm/test/Transforms/PGOProfile/memprof_max_cold_threshold.test b/llvm/test/Transforms/PGOProfile/memprof_max_cold_threshold.test new file mode 100644 index 0000000000000..8ccaa170b0ed9 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/memprof_max_cold_threshold.test @@ -0,0 +1,163 @@ +;; Test the -memprof-min-percent-max-cold-size flag for annotating only the +;; largest contexts with size info. + +; RUN: split-file %s %t + +;; Specify version 4 so that a summary is generated. +; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_max_cold_threshold.yaml -o %t/memprof_max_cold_threshold.memprofdata + +; RUN: llvm-profdata show --memory %t/memprof_max_cold_threshold.memprofdata | FileCheck %s --check-prefixes=SUMMARY +; SUMMARY: # MemProfSummary: +; SUMMARY: # Total contexts: 5 +; SUMMARY: # Total cold contexts: 4 +; SUMMARY: # Total hot contexts: 0 +; SUMMARY: # Maximum cold context total size: 400 +; SUMMARY: # Maximum warm context total size: 500 +; SUMMARY: # Maximum hot context total size: 0 + +;; Test with various thresholds, starting with the baseline of no context size +;; info. +; RUN: opt < %t/memprof_max_cold_threshold.ll -passes='memprof-use' -S | FileCheck %s --check-prefixes=ALL,NONE +; RUN: opt < %t/memprof_max_cold_threshold.ll -memprof-min-percent-max-cold-size=99 -passes='memprof-use' -S | FileCheck %s --check-prefixes=ALL,THRESH99 +; RUN: opt < %t/memprof_max_cold_threshold.ll -memprof-min-percent-max-cold-size=75 -passes='memprof-use' -S | FileCheck %s --check-prefixes=ALL,THRESH75 +; RUN: opt < %t/memprof_max_cold_threshold.ll -memprof-min-percent-max-cold-size=50 -passes='memprof-use' -S | FileCheck %s --check-prefixes=ALL,THRESH50 + +;; Make sure serializing / deserializing through bitcode works +; RUN: opt < %t/memprof_max_cold_threshold.ll -memprof-min-percent-max-cold-size=75 -passes='memprof-use' -o %t/out.bc +; RUN: llvm-dis %t/out.bc -o - | FileCheck %s --check-prefixes=ALL,THRESH75 + +;--- memprof_max_cold_threshold.yaml +--- +HeapProfileRecords: + - GUID: _Z3foov + AllocSites: + - Callstack: + - { Function: _Z3foov, LineOffset: 0, Column: 22, IsInlineFrame: false } + - { Function: main, LineOffset: 2, Column: 5, IsInlineFrame: false } + MemInfoBlock: + # Smallest cold, 25% of largest cold size + TotalSize: 100 + AllocCount: 1 + TotalLifetimeAccessDensity: 1 + TotalLifetime: 1000000 + - Callstack: + - { Function: _Z3foov, LineOffset: 0, Column: 22, IsInlineFrame: false } + - { Function: main, LineOffset: 3, Column: 5, IsInlineFrame: false } + MemInfoBlock: + # Largest cold + TotalSize: 400 + AllocCount: 1 + TotalLifetimeAccessDensity: 1 + TotalLifetime: 1000000 + - Callstack: + - { Function: _Z3foov, LineOffset: 0, Column: 22, IsInlineFrame: false } + - { Function: main, LineOffset: 4, Column: 5, IsInlineFrame: false } + MemInfoBlock: + # Second largest cold, 75% of largest cold size + TotalSize: 300 + AllocCount: 1 + TotalLifetimeAccessDensity: 1 + TotalLifetime: 1000000 + - Callstack: + - { Function: _Z3foov, LineOffset: 0, Column: 22, IsInlineFrame: false } + - { Function: main, LineOffset: 5, Column: 5, IsInlineFrame: false } + MemInfoBlock: + # Second smallest cold, 50% of largest cold size + TotalSize: 200 + AllocCount: 1 + TotalLifetimeAccessDensity: 1 + TotalLifetime: 1000000 + - Callstack: + - { Function: _Z3foov, LineOffset: 0, Column: 22, IsInlineFrame: false } + - { Function: main, LineOffset: 6, Column: 5, IsInlineFrame: false } + MemInfoBlock: + # Largest context, which is non-cold (due to short lifetime) + TotalSize: 500 + AllocCount: 1 + TotalLifetimeAccessDensity: 1 + TotalLifetime: 1 + CallSites: [] +... +;--- memprof_max_cold_threshold.ll +define dso_local ptr @_Z3foov() !dbg !4 { +entry: + %call = call ptr @_Znam(i64 4) #0, !dbg !5 +; ALL: call ptr @_Znam(i64 4) {{.*}} !memprof ![[M1:[0-9]+]] + ret ptr %call +} + +;; No MIBs get context size info without option. +; NONE: ![[M1]] = !{![[M2:[0-9]+]], ![[M3:[0-9]+]], ![[M4:[0-9]+]], ![[M5:[0-9]+]], ![[M6:[0-9]+]]} +; NONE: ![[M2]] = !{![[C2:[0-9]+]], !"cold"} +; NONE: ![[C2]] = !{i64 590523745590780990, i64 720385627691022109} +; NONE: ![[M3]] = !{![[C3:[0-9]+]], !"cold"} +; NONE: ![[C3]] = !{i64 590523745590780990, i64 8256520048276991898} +; NONE: ![[M4]] = !{![[C4:[0-9]+]], !"cold"} +; NONE: ![[C4]] = !{i64 590523745590780990, i64 -6953100768213558995} +; NONE: ![[M5]] = !{![[C5:[0-9]+]], !"cold"} +; NONE: ![[C5]] = !{i64 590523745590780990, i64 -6435117705372285425} +; NONE: ![[M6]] = !{![[C6:[0-9]+]], !"notcold"} +; NONE: ![[C6]] = !{i64 590523745590780990, i64 -2847840206325626610} + +; THRESH99: ![[M1]] = !{![[M2:[0-9]+]], ![[M3:[0-9]+]], ![[M4:[0-9]+]], ![[M5:[0-9]+]], ![[M6:[0-9]+]]} +; THRESH99: ![[M2]] = !{![[C2:[0-9]+]], !"cold"} +; THRESH99: ![[C2]] = !{i64 590523745590780990, i64 720385627691022109} +;; MIB with size 400 is now included with threshold 99% +; THRESH99: ![[M3]] = !{![[C3:[0-9]+]], !"cold", ![[S3:[0-9]+]]} +; THRESH99: ![[C3]] = !{i64 590523745590780990, i64 8256520048276991898} +; THRESH99: ![[S3]] = !{i64 6509573709067523871, i64 400} +; THRESH99: ![[M4]] = !{![[C4:[0-9]+]], !"cold"} +; THRESH99: ![[C4]] = !{i64 590523745590780990, i64 -6953100768213558995} +; THRESH99: ![[M5]] = !{![[C5:[0-9]+]], !"cold"} +; THRESH99: ![[C5]] = !{i64 590523745590780990, i64 -6435117705372285425} +; THRESH99: ![[M6]] = !{![[C6:[0-9]+]], !"notcold"} +; THRESH99: ![[C6]] = !{i64 590523745590780990, i64 -2847840206325626610} + +; THRESH75: ![[M1]] = !{![[M2:[0-9]+]], ![[M3:[0-9]+]], ![[M4:[0-9]+]], ![[M5:[0-9]+]], ![[M6:[0-9]+]]} +; THRESH75: ![[M2]] = !{![[C2:[0-9]+]], !"cold"} +; THRESH75: ![[C2]] = !{i64 590523745590780990, i64 720385627691022109} +; THRESH75: ![[M3]] = !{![[C3:[0-9]+]], !"cold", ![[S3:[0-9]+]]} +; THRESH75: ![[C3]] = !{i64 590523745590780990, i64 8256520048276991898} +; THRESH75: ![[S3]] = !{i64 6509573709067523871, i64 400} +; THRESH75: ![[M4]] = !{![[C4:[0-9]+]], !"cold"} +; THRESH75: ![[C4]] = !{i64 590523745590780990, i64 -6953100768213558995} +;; MIB with size 300 is now included with threshold 75% +; THRESH75: ![[M5]] = !{![[C5:[0-9]+]], !"cold", ![[S5:[0-9]+]]} +; THRESH75: ![[C5]] = !{i64 590523745590780990, i64 -6435117705372285425} +; THRESH75: ![[S5]] = !{i64 86248815258696712, i64 300} +; THRESH75: ![[M6]] = !{![[C6:[0-9]+]], !"notcold"} +; THRESH75: ![[C6]] = !{i64 590523745590780990, i64 -2847840206325626610} + +; THRESH50: ![[M1]] = !{![[M2:[0-9]+]], ![[M3:[0-9]+]], ![[M4:[0-9]+]], ![[M5:[0-9]+]], ![[M6:[0-9]+]]} +;; The smallest never gets context size info. +; THRESH50: ![[M2]] = !{![[C2:[0-9]+]], !"cold"} +; THRESH50: ![[C2]] = !{i64 590523745590780990, i64 720385627691022109} +; THRESH50: ![[M3]] = !{![[C3:[0-9]+]], !"cold", ![[S3:[0-9]+]]} +; THRESH50: ![[C3]] = !{i64 590523745590780990, i64 8256520048276991898} +; THRESH50: ![[S3]] = !{i64 6509573709067523871, i64 400} +;; MIB with size 200 is now included with threshold 50% +; THRESH50: ![[M4]] = !{![[C4:[0-9]+]], !"cold", ![[S4:[0-9]+]]} +; THRESH50: ![[C4]] = !{i64 590523745590780990, i64 -6953100768213558995} +; THRESH50: ![[S4]] = !{i64 -2629930016428010893, i64 200} +; THRESH50: ![[M5]] = !{![[C5:[0-9]+]], !"cold", ![[S5:[0-9]+]]} +; THRESH50: ![[C5]] = !{i64 590523745590780990, i64 -6435117705372285425} +; THRESH50: ![[S5]] = !{i64 86248815258696712, i64 300} +;; Non cold context never gets context size info +; THRESH50: ![[M6]] = !{![[C6:[0-9]+]], !"notcold"} +; THRESH50: ![[C6]] = !{i64 590523745590780990, i64 -2847840206325626610} + +declare ptr @_Znam(i64) + +attributes #0 = { builtin allocsize(0) } + +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1) +!1 = !DIFile(filename: "t", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 1, unit: !0) +!5 = !DILocation(line: 1, column: 22, scope: !4) +!6 = !DILocation(line: 2, column: 22, scope: !4) +!7 = !DILocation(line: 3, column: 22, scope: !4) +!8 = !DILocation(line: 4, column: 22, scope: !4)