diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index edbce706953d1..b46124a4ed0d5 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -28,16 +28,17 @@ AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, /// the resulting metadata node. MDNode *buildCallstackMetadata(ArrayRef CallStack, LLVMContext &Ctx); +/// Build metadata from the provided list of full stack id and profiled size, to +/// use when reporting of hinted sizes is enabled. +MDNode *buildContextSizeMetadata(ArrayRef ContextSizeInfo, + LLVMContext &Ctx); + /// Returns the stack node from an MIB metadata node. MDNode *getMIBStackNode(const MDNode *MIB); /// Returns the allocation type from an MIB metadata node. AllocationType getMIBAllocType(const MDNode *MIB); -/// Returns the total size from an MIB metadata node, or 0 if it was not -/// recorded. -uint64_t getMIBTotalSize(const MDNode *MIB); - /// Returns the string to use in attributes with the given type. std::string getAllocTypeAttributeString(AllocationType Type); @@ -55,11 +56,15 @@ class CallStackTrie { // Allocation types for call context sharing the context prefix at this // node. uint8_t AllocTypes; - uint64_t TotalSize; + // If the user has requested reporting of hinted sizes, keep track of the + // associated full stack id and profiled sizes. Can have more than one + // after trimming (e.g. when building from metadata). This is only placed on + // the last (root-most) trie node for each allocation context. + std::vector ContextSizeInfo; // Map of caller stack id to the corresponding child Trie node. std::map Callers; - CallStackTrieNode(AllocationType Type, uint64_t TotalSize) - : AllocTypes(static_cast(Type)), TotalSize(TotalSize) {} + CallStackTrieNode(AllocationType Type) + : AllocTypes(static_cast(Type)) {} }; // The node for the allocation at the root. @@ -75,6 +80,11 @@ class CallStackTrie { delete Node; } + // Recursively build up a complete list of context size information from the + // trie nodes reached form the given Node, for hint size reporting. + void collectContextSizeInfo(CallStackTrieNode *Node, + std::vector &ContextSizeInfo); + // Recursive helper to trim contexts and create metadata nodes. bool buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, std::vector &MIBCallStack, @@ -93,7 +103,7 @@ class CallStackTrie { /// allocation call down to the bottom of the call stack (i.e. callee to /// caller order). void addCallStack(AllocationType AllocType, ArrayRef StackIds, - uint64_t TotalSize = 0); + std::vector ContextSizeInfo = {}); /// Add the call stack context along with its allocation type from the MIB /// metadata to the Trie. diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 41a6447356c23..a0fb32f67e385 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -308,7 +308,7 @@ enum GlobalValueSummarySymtabCodes { FS_PERMODULE_CALLSITE_INFO = 26, // Summary of per-module allocation memprof metadata. // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex), - // [nummib x total size]?] + // [nummib x (numcontext x total size)]?] FS_PERMODULE_ALLOC_INFO = 27, // Summary of combined index memprof callsite metadata. // [valueid, numstackindices, numver, @@ -317,9 +317,20 @@ enum GlobalValueSummarySymtabCodes { // Summary of combined index allocation memprof metadata. // [nummib, numver, // nummib x (alloc type, numstackids, numstackids x stackidindex), - // numver x version, [nummib x total size]?] + // numver x version] FS_COMBINED_ALLOC_INFO = 29, + // List of all stack ids referenced by index in the callsite and alloc infos. + // [n x stack id] FS_STACK_IDS = 30, + // List of all full stack id pairs corresponding to the total sizes recorded + // at the end of the alloc info when reporting of hinted bytes is enabled. + // We use a fixed-width array, which is more efficient as these ids typically + // are close to 64 bits in size. The max fixed width value supported is 32 + // bits so each 64-bit context id hash is recorded as a pair (upper 32 bits + // first). This record must immediately precede the associated alloc info, and + // the entries must be in the exact same order as the corresponding sizes. + // [nummib x (numcontext x full stack id)] + FS_ALLOC_CONTEXT_IDS = 31, }; enum MetadataCodes { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 1cfe7c15f97db..62d8e07bd9acd 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -302,6 +302,14 @@ template <> struct DenseMapInfo { static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); } }; +// For optional hinted size reporting, holds a pair of the full stack id +// (pre-trimming, from the full context in the profile), and the associated +// total profiled size. +struct ContextTotalSize { + uint64_t FullStackId; + uint64_t TotalSize; +}; + /// Summary of memprof callsite metadata. struct CallsiteInfo { // Actual callee function. @@ -408,9 +416,13 @@ struct AllocInfo { // Vector of MIBs in this memprof metadata. std::vector MIBs; - // If requested, keep track of total profiled sizes for each MIB. This will be - // a vector of the same length and order as the MIBs vector, if non-empty. - std::vector TotalSizes; + // If requested, keep track of full stack contexts and total profiled sizes + // for each MIB. This will be a vector of the same length and order as the + // MIBs vector, if non-empty. Note that each MIB in the summary can have + // multiple of these as we trim the contexts when possible during matching. + // For hinted size reporting we, however, want the original pre-trimmed full + // stack context id for better correlation with the profile. + std::vector> ContextSizeInfos; AllocInfo(std::vector MIBs) : MIBs(std::move(MIBs)) { Versions.push_back(0); @@ -432,14 +444,18 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) { for (auto &M : AE.MIBs) { OS << "\t\t" << M << "\n"; } - if (!AE.TotalSizes.empty()) { - OS << " TotalSizes per MIB:\n\t\t"; - First = true; - for (uint64_t TS : AE.TotalSizes) { - if (!First) - OS << ", "; - First = false; - OS << TS << "\n"; + if (!AE.ContextSizeInfos.empty()) { + OS << "\tContextSizeInfo per MIB:\n"; + for (auto Infos : AE.ContextSizeInfos) { + OS << "\t\t"; + bool FirstInfo = true; + for (auto [FullStackId, TotalSize] : Infos) { + if (!FirstInfo) + OS << ", "; + FirstInfo = false; + OS << "{ " << FullStackId << ", " << TotalSize << " }"; + } + OS << "\n"; } } return OS; diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index 2b49dce17b793..85aadefb96e05 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -99,12 +99,6 @@ AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) { return AllocationType::NotCold; } -uint64_t llvm::memprof::getMIBTotalSize(const MDNode *MIB) { - if (MIB->getNumOperands() < 3) - return 0; - return mdconst::dyn_extract(MIB->getOperand(2))->getZExtValue(); -} - std::string llvm::memprof::getAllocTypeAttributeString(AllocationType Type) { switch (Type) { case AllocationType::NotCold: @@ -135,22 +129,21 @@ bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) { return NumAllocTypes == 1; } -void CallStackTrie::addCallStack(AllocationType AllocType, - ArrayRef StackIds, - uint64_t TotalSize) { +void CallStackTrie::addCallStack( + AllocationType AllocType, ArrayRef StackIds, + std::vector ContextSizeInfo) { bool First = true; CallStackTrieNode *Curr = nullptr; for (auto StackId : StackIds) { - // If this is the first stack frame, add or update alloc node. + // If this is the first stack frame, add or update alloc node. if (First) { First = false; if (Alloc) { assert(AllocStackId == StackId); Alloc->AllocTypes |= static_cast(AllocType); - Alloc->TotalSize += TotalSize; } else { AllocStackId = StackId; - Alloc = new CallStackTrieNode(AllocType, TotalSize); + Alloc = new CallStackTrieNode(AllocType); } Curr = Alloc; continue; @@ -160,15 +153,18 @@ void CallStackTrie::addCallStack(AllocationType AllocType, if (Next != Curr->Callers.end()) { Curr = Next->second; Curr->AllocTypes |= static_cast(AllocType); - Curr->TotalSize += TotalSize; continue; } // Otherwise add a new caller node. - auto *New = new CallStackTrieNode(AllocType, TotalSize); + auto *New = new CallStackTrieNode(AllocType); Curr->Callers[StackId] = New; Curr = New; } assert(Curr); + Curr->ContextSizeInfo.insert(Curr->ContextSizeInfo.end(), + ContextSizeInfo.begin(), ContextSizeInfo.end()); + std::vector AllContextSizeInfo; + collectContextSizeInfo(Curr, AllContextSizeInfo); } void CallStackTrie::addCallStack(MDNode *MIB) { @@ -181,21 +177,52 @@ void CallStackTrie::addCallStack(MDNode *MIB) { assert(StackId); CallStack.push_back(StackId->getZExtValue()); } - addCallStack(getMIBAllocType(MIB), CallStack, getMIBTotalSize(MIB)); + std::vector ContextSizeInfo; + // Collect the context size information if it exists. + if (MIB->getNumOperands() > 2) { + for (unsigned I = 2; I < MIB->getNumOperands(); I++) { + MDNode *ContextSizePair = dyn_cast(MIB->getOperand(I)); + assert(ContextSizePair->getNumOperands() == 2); + uint64_t FullStackId = + mdconst::dyn_extract(ContextSizePair->getOperand(0)) + ->getZExtValue(); + uint64_t TotalSize = + mdconst::dyn_extract(ContextSizePair->getOperand(1)) + ->getZExtValue(); + ContextSizeInfo.push_back({FullStackId, TotalSize}); + } + } + addCallStack(getMIBAllocType(MIB), CallStack, std::move(ContextSizeInfo)); } static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef MIBCallStack, - AllocationType AllocType, uint64_t TotalSize) { + AllocationType AllocType, + ArrayRef ContextSizeInfo) { SmallVector MIBPayload( {buildCallstackMetadata(MIBCallStack, Ctx)}); MIBPayload.push_back( MDString::get(Ctx, getAllocTypeAttributeString(AllocType))); - if (TotalSize) - MIBPayload.push_back(ValueAsMetadata::get( - ConstantInt::get(Type::getInt64Ty(Ctx), TotalSize))); + if (!ContextSizeInfo.empty()) { + for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { + auto *FullStackIdMD = ValueAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Ctx), FullStackId)); + auto *TotalSizeMD = ValueAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Ctx), TotalSize)); + auto *ContextSizeMD = MDNode::get(Ctx, {FullStackIdMD, TotalSizeMD}); + MIBPayload.push_back(ContextSizeMD); + } + } return MDNode::get(Ctx, MIBPayload); } +void CallStackTrie::collectContextSizeInfo( + CallStackTrieNode *Node, std::vector &ContextSizeInfo) { + ContextSizeInfo.insert(ContextSizeInfo.end(), Node->ContextSizeInfo.begin(), + Node->ContextSizeInfo.end()); + for (auto &Caller : Node->Callers) + collectContextSizeInfo(Caller.second, ContextSizeInfo); +} + // Recursive helper to trim contexts and create metadata nodes. // Caller should have pushed Node's loc to MIBCallStack. Doing this in the // caller makes it simpler to handle the many early returns in this method. @@ -206,8 +233,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, // Trim context below the first node in a prefix with a single alloc type. // Add an MIB record for the current call stack prefix. if (hasSingleAllocType(Node->AllocTypes)) { + std::vector ContextSizeInfo; + collectContextSizeInfo(Node, ContextSizeInfo); MIBNodes.push_back(createMIBNode( - Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, Node->TotalSize)); + Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo)); return true; } @@ -243,8 +272,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, // non-cold allocation type. if (!CalleeHasAmbiguousCallerContext) return false; + std::vector ContextSizeInfo; + collectContextSizeInfo(Node, ContextSizeInfo); MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold, - Node->TotalSize)); + ContextSizeInfo)); return true; } @@ -256,11 +287,15 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { if (hasSingleAllocType(Alloc->AllocTypes)) { addAllocTypeAttribute(Ctx, CI, (AllocationType)Alloc->AllocTypes); if (MemProfReportHintedSizes) { - assert(Alloc->TotalSize); - errs() << "Total size for allocation with location hash " << AllocStackId - << " and single alloc type " - << getAllocTypeAttributeString((AllocationType)Alloc->AllocTypes) - << ": " << Alloc->TotalSize << "\n"; + std::vector ContextSizeInfo; + collectContextSizeInfo(Alloc, ContextSizeInfo); + for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { + errs() + << "MemProf hinting: Total size for full allocation context hash " + << FullStackId << " and single alloc type " + << getAllocTypeAttributeString((AllocationType)Alloc->AllocTypes) + << ": " << TotalSize << "\n"; + } } return false; } diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 004e8b76a3c85..1593d4d901463 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -523,6 +523,7 @@ static void computeFunctionSummary( if (MemProfMD) { std::vector MIBs; std::vector TotalSizes; + std::vector> ContextSizeInfos; for (auto &MDOp : MemProfMD->operands()) { auto *MIBMD = cast(MDOp); MDNode *StackNode = getMIBStackNode(MIBMD); @@ -540,18 +541,32 @@ static void computeFunctionSummary( if (StackIdIndices.empty() || StackIdIndices.back() != StackIdIdx) StackIdIndices.push_back(StackIdIdx); } + // If we have context size information, collect it for inclusion in + // the summary. + assert(MIBMD->getNumOperands() > 2 || !MemProfReportHintedSizes); + if (MIBMD->getNumOperands() > 2) { + std::vector ContextSizes; + for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) { + MDNode *ContextSizePair = dyn_cast(MIBMD->getOperand(I)); + assert(ContextSizePair->getNumOperands() == 2); + uint64_t FullStackId = mdconst::dyn_extract( + ContextSizePair->getOperand(0)) + ->getZExtValue(); + uint64_t TS = mdconst::dyn_extract( + ContextSizePair->getOperand(1)) + ->getZExtValue(); + ContextSizes.push_back({FullStackId, TS}); + } + ContextSizeInfos.push_back(std::move(ContextSizes)); + } MIBs.push_back( MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices))); - if (MemProfReportHintedSizes) { - auto TotalSize = getMIBTotalSize(MIBMD); - assert(TotalSize); - TotalSizes.push_back(TotalSize); - } } Allocs.push_back(AllocInfo(std::move(MIBs))); - if (MemProfReportHintedSizes) { - assert(Allocs.back().MIBs.size() == TotalSizes.size()); - Allocs.back().TotalSizes = std::move(TotalSizes); + assert(!ContextSizeInfos.empty() || !MemProfReportHintedSizes); + if (!ContextSizeInfos.empty()) { + assert(Allocs.back().MIBs.size() == ContextSizeInfos.size()); + Allocs.back().ContextSizeInfos = std::move(ContextSizeInfos); } } else if (!InstCallsite.empty()) { SmallVector StackIdIndices; diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index b7ed9cdf63145..8f79ccdb9ff75 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -328,6 +328,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO) STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO) STRINGIFY_CODE(FS, STACK_IDS) + STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS) } case bitc::METADATA_ATTACHMENT_ID: switch (CodeID) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 446c98c8cecd8..05a0e87e53be6 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -7603,6 +7603,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::vector PendingCallsites; std::vector PendingAllocs; + std::vector PendingContextIds; while (true) { Expected MaybeEntry = Stream.advanceSkippingSubblocks(); @@ -8031,6 +8032,16 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { break; } + case bitc::FS_ALLOC_CONTEXT_IDS: { + // This is an array of 32-bit fixed-width values, holding each 64-bit + // context id as a pair of adjacent (most significant first) 32-bit words. + assert(Record.size() % 2 == 0); + PendingContextIds.reserve(Record.size() / 2); + for (auto R = Record.begin(); R != Record.end(); R += 2) + PendingContextIds.push_back(*R << 32 | *(R + 1)); + break; + } + case bitc::FS_PERMODULE_ALLOC_INFO: { unsigned I = 0; std::vector MIBs; @@ -8052,18 +8063,41 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { } MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList))); } - std::vector TotalSizes; - // We either have no sizes or NumMIBs of them. - assert(I == Record.size() || Record.size() - I == NumMIBs); + // We either have nothing left or at least NumMIBs context size info + // indices left (for the total sizes included when reporting of hinted + // bytes is enabled). + assert(I == Record.size() || Record.size() - I >= NumMIBs); + std::vector> AllContextSizes; if (I < Record.size()) { + assert(!PendingContextIds.empty() && + "Missing context ids for alloc sizes"); + unsigned ContextIdIndex = 0; MIBsRead = 0; - while (MIBsRead++ < NumMIBs) - TotalSizes.push_back(Record[I++]); + // The sizes are a linearized array of sizes, where for each MIB there + // is 1 or more sizes (due to context trimming, each MIB in the metadata + // and summarized here can correspond to more than one original context + // from the profile). + while (MIBsRead++ < NumMIBs) { + // First read the number of contexts recorded for this MIB. + unsigned NumContextSizeInfoEntries = Record[I++]; + assert(Record.size() - I >= NumContextSizeInfoEntries); + std::vector ContextSizes; + ContextSizes.reserve(NumContextSizeInfoEntries); + for (unsigned J = 0; J < NumContextSizeInfoEntries; J++) { + assert(ContextIdIndex < PendingContextIds.size()); + // PendingContextIds read from the preceding FS_ALLOC_CONTEXT_IDS + // should be in the same order as the total sizes. + ContextSizes.push_back( + {PendingContextIds[ContextIdIndex++], Record[I++]}); + } + AllContextSizes.push_back(std::move(ContextSizes)); + } + PendingContextIds.clear(); } PendingAllocs.push_back(AllocInfo(std::move(MIBs))); - if (!TotalSizes.empty()) { - assert(PendingAllocs.back().MIBs.size() == TotalSizes.size()); - PendingAllocs.back().TotalSizes = std::move(TotalSizes); + if (!AllContextSizes.empty()) { + assert(PendingAllocs.back().MIBs.size() == AllContextSizes.size()); + PendingAllocs.back().ContextSizeInfos = std::move(AllContextSizes); } break; } @@ -8091,21 +8125,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { SmallVector Versions; for (unsigned J = 0; J < NumVersions; J++) Versions.push_back(Record[I++]); - std::vector TotalSizes; - // We either have no sizes or NumMIBs of them. - assert(I == Record.size() || Record.size() - I == NumMIBs); - if (I < Record.size()) { - MIBsRead = 0; - while (MIBsRead++ < NumMIBs) { - TotalSizes.push_back(Record[I++]); - } - } - PendingAllocs.push_back( - AllocInfo(std::move(Versions), std::move(MIBs))); - if (!TotalSizes.empty()) { - assert(PendingAllocs.back().MIBs.size() == TotalSizes.size()); - PendingAllocs.back().TotalSizes = std::move(TotalSizes); - } + assert(I == Record.size()); + PendingAllocs.push_back(AllocInfo(std::move(Versions), std::move(MIBs))); break; } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index ee9cc4b6e0c0e..a1bc573806235 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -230,7 +230,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase { void writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev, - unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F); + unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId, + const Function &F); void writeModuleLevelReferences(const GlobalVariable &V, SmallVector &NameVals, unsigned FSModRefsAbbrev, @@ -4193,9 +4194,10 @@ static void writeTypeIdCompatibleVtableSummaryRecord( static void writeFunctionHeapProfileRecords( BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev, - unsigned AllocAbbrev, bool PerModule, + unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule, std::function GetValueID, - std::function GetStackIndex) { + std::function GetStackIndex, + bool WriteContextSizeInfoIndex) { SmallVector Record; for (auto &CI : FS->callsites()) { @@ -4237,10 +4239,34 @@ static void writeFunctionHeapProfileRecords( for (auto V : AI.Versions) Record.push_back(V); } - assert(AI.TotalSizes.empty() || AI.TotalSizes.size() == AI.MIBs.size()); - if (!AI.TotalSizes.empty()) { - for (auto Size : AI.TotalSizes) - Record.push_back(Size); + assert(AI.ContextSizeInfos.empty() || + AI.ContextSizeInfos.size() == AI.MIBs.size()); + // Optionally emit the context size information if it exists. + if (WriteContextSizeInfoIndex && !AI.ContextSizeInfos.empty()) { + // The abbreviation id for the context ids record should have been created + // if we are emitting the per-module index, which is where we write this + // info. + assert(ContextIdAbbvId); + SmallVector ContextIds; + // At least one context id per ContextSizeInfos entry (MIB), broken into 2 + // halves. + ContextIds.reserve(AI.ContextSizeInfos.size() * 2); + for (auto &Infos : AI.ContextSizeInfos) { + Record.push_back(Infos.size()); + for (auto [FullStackId, TotalSize] : Infos) { + // The context ids are emitted separately as a fixed width array, + // which is more efficient than a VBR given that these hashes are + // typically close to 64-bits. The max fixed width entry is 32 bits so + // it is split into 2. + ContextIds.push_back(static_cast(FullStackId >> 32)); + ContextIds.push_back(static_cast(FullStackId)); + Record.push_back(TotalSize); + } + } + // The context ids are expected by the reader to immediately precede the + // associated alloc info record. + Stream.EmitRecord(bitc::FS_ALLOC_CONTEXT_IDS, ContextIds, + ContextIdAbbvId); } Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO : bitc::FS_COMBINED_ALLOC_INFO, @@ -4253,7 +4279,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsRelBFAbbrev, unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev, - unsigned AllocAbbrev, const Function &F) { + unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) { NameVals.push_back(ValueID); FunctionSummary *FS = cast(Summary); @@ -4264,10 +4290,11 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( }); writeFunctionHeapProfileRecords( - Stream, FS, CallsiteAbbrev, AllocAbbrev, + Stream, FS, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, /*PerModule*/ true, /*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); }, - /*GetStackIndex*/ [&](unsigned I) { return I; }); + /*GetStackIndex*/ [&](unsigned I) { return I; }, + /*WriteContextSizeInfoIndex*/ true); auto SpecialRefCnts = FS->specialRefCounts(); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); @@ -4399,11 +4426,23 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS)); // numids x stackid StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + // FIXME: The stack ids are hashes that are close to 64 bits in size, so + // emitting as a pair of 32-bit fixed-width values, as we do for context + // ids, would be more efficient. StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv)); Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId); } + // n x context id + auto ContextIdAbbv = std::make_shared(); + ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS)); + ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + // The context ids are hashes that are close to 64 bits in size, so emitting + // as a pair of 32-bit fixed-width values is more efficient than a VBR. + ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + unsigned ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv)); + // Abbrev for FS_PERMODULE_PROFILE. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); @@ -4484,7 +4523,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib // n x (alloc type, numstackids, numstackids x stackidindex) - // optional: nummib x total size + // optional: nummib x (numcontext x total size) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv)); @@ -4508,7 +4547,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { auto *Summary = VI.getSummaryList()[0].get(); writePerModuleFunctionSummaryRecord( NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev, - FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, F); + FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F); } // Capture references from GlobalVariable initializers, which are outside @@ -4737,7 +4776,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { getReferencedTypeIds(FS, ReferencedTypeIds); writeFunctionHeapProfileRecords( - Stream, FS, CallsiteAbbrev, AllocAbbrev, + Stream, FS, CallsiteAbbrev, AllocAbbrev, /*ContextIdAbbvId*/ 0, /*PerModule*/ false, /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned { @@ -4757,7 +4796,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { // the case of distributed indexes). assert(StackIdIndicesToIndex.contains(I)); return StackIdIndicesToIndex[I]; - }); + }, + /*WriteContextSizeInfoIndex*/ false); NameVals.push_back(*ValueId); assert(ModuleIdMap.count(FS->modulePath())); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ee807ca13787d..61f9c0cfe69f2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4998,14 +4998,35 @@ void Verifier::visitMemProfMetadata(Instruction &I, MDNode *MD) { MDNode *StackMD = dyn_cast(MIB->getOperand(0)); visitCallStackMetadata(StackMD); - // Check that remaining operands, except possibly the last, are MDString. - Check(llvm::all_of(MIB->operands().drop_front().drop_back(), - [](const MDOperand &Op) { return isa(Op); }), - "Not all !memprof MemInfoBlock operands 1 to N-1 are MDString", MIB); - // The last operand might be the total profiled size so can be an integer. - auto &LastOperand = MIB->operands().back(); - Check(isa(LastOperand) || mdconst::hasa(LastOperand), - "Last !memprof MemInfoBlock operand not MDString or int", MIB); + // The next set of 1 or more operands should be MDString. + unsigned I = 1; + for (; I < MIB->getNumOperands(); ++I) { + if (!isa(MIB->getOperand(I))) { + Check(I > 1, + "!memprof MemInfoBlock second operand should be an MDString", + MIB); + break; + } + } + + // Any remaining should be MDNode that are pairs of integers + for (; I < MIB->getNumOperands(); ++I) { + MDNode *OpNode = dyn_cast(MIB->getOperand(I)); + Check(OpNode, "Not all !memprof MemInfoBlock operands 2 to N are MDNode", + MIB); + Check(OpNode->getNumOperands() == 2, + "Not all !memprof MemInfoBlock operands 2 to N are MDNode with 2 " + "operands", + MIB); + // Check that all of Op's operands are ConstantInt. + Check(llvm::all_of(OpNode->operands(), + [](const MDOperand &Op) { + return mdconst::hasa(Op); + }), + "Not all !memprof MemInfoBlock operands 2 to N are MDNode with " + "ConstantInt operands", + MIB); + } } } diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index da5ded23ecc04..99b23c250c538 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -475,7 +475,8 @@ class CallsiteContextGraph { void addStackNodesForMIB(ContextNode *AllocNode, CallStack &StackContext, CallStack &CallsiteContext, - AllocationType AllocType, uint64_t TotalSize); + AllocationType AllocType, + ArrayRef ContextSizeInfo); /// Matches all callsite metadata (or summary) to the nodes created for /// allocation memprof MIB metadata, synthesizing new nodes to reflect any @@ -705,9 +706,10 @@ class CallsiteContextGraph { /// Map from each context ID to the AllocationType assigned to that context. DenseMap ContextIdToAllocationType; - /// Map from each contextID to the profiled aggregate allocation size, + /// Map from each contextID to the profiled full contexts and their total + /// sizes (there may be more than one due to context trimming), /// optionally populated when requested (via MemProfReportHintedSizes). - DenseMap ContextIdToTotalSize; + DenseMap> ContextIdToContextSizeInfos; /// Identifies the context node created for a stack id when adding the MIB /// contexts to the graph. This is used to locate the context nodes when @@ -1203,8 +1205,7 @@ template void CallsiteContextGraph::addStackNodesForMIB( ContextNode *AllocNode, CallStack &StackContext, CallStack &CallsiteContext, AllocationType AllocType, - uint64_t TotalSize) { - assert(!MemProfReportHintedSizes || TotalSize > 0); + ArrayRef ContextSizeInfo) { // Treating the hot alloc type as NotCold before the disambiguation for "hot" // is done. if (AllocType == AllocationType::Hot) @@ -1213,8 +1214,9 @@ void CallsiteContextGraph::addStackNodesForMIB( ContextIdToAllocationType[++LastContextId] = AllocType; if (MemProfReportHintedSizes) { - assert(TotalSize); - ContextIdToTotalSize[LastContextId] = TotalSize; + assert(!ContextSizeInfo.empty()); + auto &Entry = ContextIdToContextSizeInfos[LastContextId]; + Entry.insert(Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end()); } // Update alloc type and context ids for this MIB. @@ -1259,10 +1261,6 @@ CallsiteContextGraph::duplicateContextIds( assert(ContextIdToAllocationType.count(OldId)); // The new context has the same allocation type as original. ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; - // For now set this to 0 so we don't duplicate sizes. Not clear how to divvy - // up the size. Assume that if we are able to duplicate context ids that we - // will be able to disambiguate all copies. - ContextIdToTotalSize[LastContextId] = 0; } return NewContextIds; } @@ -1961,12 +1959,28 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( // Add all of the MIBs and their stack nodes. for (auto &MDOp : MemProfMD->operands()) { auto *MIBMD = cast(MDOp); + std::vector ContextSizeInfo; + // Collect the context size information if it exists. + if (MIBMD->getNumOperands() > 2) { + for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) { + MDNode *ContextSizePair = + dyn_cast(MIBMD->getOperand(I)); + assert(ContextSizePair->getNumOperands() == 2); + uint64_t FullStackId = mdconst::dyn_extract( + ContextSizePair->getOperand(0)) + ->getZExtValue(); + uint64_t TotalSize = mdconst::dyn_extract( + ContextSizePair->getOperand(1)) + ->getZExtValue(); + ContextSizeInfo.push_back({FullStackId, TotalSize}); + } + } MDNode *StackNode = getMIBStackNode(MIBMD); assert(StackNode); CallStack StackContext(StackNode); addStackNodesForMIB( AllocNode, StackContext, CallsiteContext, - getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD)); + getMIBAllocType(MIBMD), ContextSizeInfo); } assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); // Memprof and callsite metadata on memory allocations no longer @@ -2042,17 +2056,19 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( EmptyContext; unsigned I = 0; assert(!MemProfReportHintedSizes || - AN.TotalSizes.size() == AN.MIBs.size()); + AN.ContextSizeInfos.size() == AN.MIBs.size()); // Now add all of the MIBs and their stack nodes. for (auto &MIB : AN.MIBs) { CallStack::const_iterator> StackContext(&MIB); - uint64_t TotalSize = 0; - if (MemProfReportHintedSizes) - TotalSize = AN.TotalSizes[I]; + std::vector ContextSizeInfo; + if (MemProfReportHintedSizes) { + for (auto [FullStackId, TotalSize] : AN.ContextSizeInfos[I]) + ContextSizeInfo.push_back({FullStackId, TotalSize}); + } addStackNodesForMIB::const_iterator>( AllocNode, StackContext, EmptyContext, MIB.AllocType, - TotalSize); + ContextSizeInfo); I++; } assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); @@ -2824,13 +2840,18 @@ void CallsiteContextGraph::printTotalSizes( std::vector SortedIds(ContextIds.begin(), ContextIds.end()); std::sort(SortedIds.begin(), SortedIds.end()); for (auto Id : SortedIds) { - auto SizeI = ContextIdToTotalSize.find(Id); - assert(SizeI != ContextIdToTotalSize.end()); auto TypeI = ContextIdToAllocationType.find(Id); assert(TypeI != ContextIdToAllocationType.end()); - OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id - << " with total size " << SizeI->second << " is " - << getAllocTypeString(Node->AllocTypes) << " after cloning\n"; + auto CSI = ContextIdToContextSizeInfos.find(Id); + if (CSI != ContextIdToContextSizeInfos.end()) { + for (auto &Info : CSI->second) { + OS << "MemProf hinting: " + << getAllocTypeString((uint8_t)TypeI->second) + << " full allocation context " << Info.FullStackId + << " with total size " << Info.TotalSize << " is " + << getAllocTypeString(Node->AllocTypes) << " after cloning\n"; + } + } } } } diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 4a43120c9a9e7..42c01fe832572 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -716,19 +716,22 @@ computeFullStackId(const std::vector &CallStack) { } static AllocationType addCallStack(CallStackTrie &AllocTrie, - const AllocationInfo *AllocInfo) { + const AllocationInfo *AllocInfo, + uint64_t FullStackId) { SmallVector StackIds; for (const auto &StackFrame : AllocInfo->CallStack) StackIds.push_back(computeStackId(StackFrame)); auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), AllocInfo->Info.getAllocCount(), AllocInfo->Info.getTotalLifetime()); - uint64_t TotalSize = 0; + std::vector ContextSizeInfo; if (MemProfReportHintedSizes) { - TotalSize = AllocInfo->Info.getTotalSize(); + auto TotalSize = AllocInfo->Info.getTotalSize(); assert(TotalSize); + assert(FullStackId != 0); + ContextSizeInfo.push_back({FullStackId, TotalSize}); } - AllocTrie.addCallStack(AllocType, StackIds, TotalSize); + AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo)); return AllocType; } @@ -964,11 +967,14 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, InlinedCallStack)) { NumOfMemProfMatchedAllocContexts++; - auto AllocType = addCallStack(AllocTrie, AllocInfo); + uint64_t FullStackId = 0; + if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes) + FullStackId = computeFullStackId(AllocInfo->CallStack); + auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId); // Record information about the allocation if match info printing // was requested. if (ClPrintMemProfMatchInfo) { - auto FullStackId = computeFullStackId(AllocInfo->CallStack); + assert(FullStackId != 0); FullStackIdToAllocMatchInfo[FullStackId] = { AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true}; } diff --git a/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll b/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll index 42819d5421ca0..3e8aa9766d6c5 100644 --- a/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll +++ b/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll @@ -62,9 +62,9 @@ attributes #0 = { noinline optnone } !0 = !{i64 8632435727821051414} !1 = !{i64 -3421689549917153178} !2 = !{!3, !5} -!3 = !{!4, !"notcold", i64 100} +!3 = !{!4, !"notcold"} !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold", i64 400} +!5 = !{!6, !"cold"} !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} !7 = !{i64 9086428284934609951} !8 = !{i64 -5964873800580613432} diff --git a/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll b/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll index 663f8525043c2..9169cc03d08d6 100644 --- a/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll +++ b/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll @@ -62,9 +62,9 @@ attributes #0 = { noinline optnone } !0 = !{i64 8632435727821051414} !1 = !{i64 -3421689549917153178} !2 = !{!3, !5} -!3 = !{!4, !"notcold", i64 100} +!3 = !{!4, !"notcold"} !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold", i64 400} +!5 = !{!6, !"cold"} !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} !7 = !{i64 9086428284934609951} !8 = !{i64 -5964873800580613432} diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll index 6922dbfd36846..6de301340acf3 100644 --- a/llvm/test/ThinLTO/X86/memprof-basic.ll +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -45,7 +45,7 @@ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -memprof-report-hinted-sizes \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ -; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=DUMP-SIZES \ ; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS \ ; RUN: --check-prefix=SIZES @@ -128,13 +128,16 @@ attributes #0 = { noinline optnone } !0 = !{i64 8632435727821051414} !1 = !{i64 -3421689549917153178} !2 = !{!3, !5} -!3 = !{!4, !"notcold", i64 100} +!3 = !{!4, !"notcold", !10} !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold", i64 400} +!5 = !{!6, !"cold", !11, !12} !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} !7 = !{i64 9086428284934609951} !8 = !{i64 -5964873800580613432} !9 = !{i64 2732490490862098848} +!10 = !{i64 123, i64 100} +!11 = !{i64 456, i64 200} +!12 = !{i64 789, i64 300} ; DUMP: CCG before cloning: @@ -143,6 +146,9 @@ attributes #0 = { noinline optnone } ; DUMP: Versions: 1 MIB: ; DUMP: AllocType 1 StackIds: 2, 3, 0 ; DUMP: AllocType 2 StackIds: 2, 3, 1 +; DUMP-SIZES: ContextSizeInfo per MIB: +; DUMP-SIZES: { 123, 100 } +; DUMP-SIZES: { 456, 200 }, { 789, 300 } ; DUMP: (clone 0) ; DUMP: AllocTypes: NotColdCold ; DUMP: ContextIds: 1 2 @@ -267,8 +273,9 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 ; DUMP: Clone of [[BAR]] -; SIZES: NotCold context 1 with total size 100 is NotCold after cloning -; SIZES: Cold context 2 with total size 400 is Cold after cloning +; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning +; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning +; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning ; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1 ; REMARKS: created clone _Z3barv.memprof.1 diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll index 8f9df20471e41..c2810dfabffbd 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll @@ -66,9 +66,9 @@ attributes #6 = { builtin } !0 = !{i64 8632435727821051414} !1 = !{i64 -3421689549917153178} !2 = !{!3, !5} -!3 = !{!4, !"notcold", i64 100} +!3 = !{!4, !"notcold"} !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold", i64 400} +!5 = !{!6, !"cold"} !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} !7 = !{i64 9086428284934609951} !8 = !{i64 -5964873800580613432} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll index c3c164d492863..068e1f116519e 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll @@ -66,9 +66,9 @@ attributes #6 = { builtin } !0 = !{i64 8632435727821051414} !1 = !{i64 -3421689549917153178} !2 = !{!3, !5} -!3 = !{!4, !"notcold", i64 100} +!3 = !{!4, !"notcold"} !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold", i64 400} +!5 = !{!6, !"cold"} !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} !7 = !{i64 9086428284934609951} !8 = !{i64 -5964873800580613432} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll index a82f872d51c7d..952e2519bbf0b 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll @@ -106,13 +106,16 @@ attributes #6 = { builtin } !0 = !{i64 8632435727821051414} !1 = !{i64 -3421689549917153178} !2 = !{!3, !5} -!3 = !{!4, !"notcold", i64 100} +!3 = !{!4, !"notcold", !10} !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold", i64 400} +!5 = !{!6, !"cold", !11, !12} !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} !7 = !{i64 9086428284934609951} !8 = !{i64 -5964873800580613432} !9 = !{i64 2732490490862098848} +!10 = !{i64 123, i64 100} +!11 = !{i64 456, i64 200} +!12 = !{i64 789, i64 300} ; DUMP: CCG before cloning: @@ -249,8 +252,9 @@ attributes #6 = { builtin } ; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv ; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold -; SIZES: NotCold context 1 with total size 100 is NotCold after cloning -; SIZES: Cold context 2 with total size 400 is Cold after cloning +; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning +; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning +; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning ; IR: define {{.*}} @main ;; The first call to foo does not allocate cold memory. It should call the diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index e1457ca7251ed..d6c86bb7ad5a8 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -335,17 +335,24 @@ for.end: ; preds = %for.cond ; MEMPROF: ![[C11]] = !{i64 1544787832369987002} ;; For non-context sensitive allocations that get attributes we emit a message -;; with the allocation hash, type, and size in bytes. -; TOTALSIZES: Total size for allocation with location hash 6792096022461663180 and single alloc type notcold: 10 -; TOTALSIZES: Total size for allocation with location hash 15737101490731057601 and single alloc type cold: 10 -;; For context sensitive allocations the size in bytes is included on the MIB -;; metadata. -; TOTALSIZES: !"cold", i64 10} -; TOTALSIZES: !"cold", i64 10} -; TOTALSIZES: !"notcold", i64 10} -; TOTALSIZES: !"cold", i64 20} -; TOTALSIZES: !"notcold", i64 10} - +;; with the full allocation context hash, type, and size in bytes. +; TOTALSIZES: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10 +; TOTALSIZES: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10 +;; For context sensitive allocations the full context hash and size in bytes +;; are in separate metadata nodes included on the MIB metadata. +; TOTALSIZES: !"cold", ![[CONTEXT1:[0-9]+]]} +; TOTALSIZES: ![[CONTEXT1]] = !{i64 8525406123785421946, i64 10} +; TOTALSIZES: !"cold", ![[CONTEXT2:[0-9]+]]} +; TOTALSIZES: ![[CONTEXT2]] = !{i64 -6732513409544482918, i64 10} +; TOTALSIZES: !"notcold", ![[CONTEXT3:[0-9]+]]} +; TOTALSIZES: ![[CONTEXT3]] = !{i64 5725971306423925017, i64 10} +;; There can be more than one context id / size pair due to context trimming +;; when we match. +; TOTALSIZES: !"cold", ![[CONTEXT4:[0-9]+]], ![[CONTEXT5:[0-9]+]]} +; TOTALSIZES: ![[CONTEXT4]] = !{i64 -2103941543456458045, i64 10} +; TOTALSIZES: ![[CONTEXT5]] = !{i64 -191931298737547222, i64 10} +; TOTALSIZES: !"notcold", ![[CONTEXT6:[0-9]+]]} +; TOTALSIZES: ![[CONTEXT6]] = !{i64 1093248920606587996, i64 10} ; MEMPROFNOCOLINFO: #[[A1]] = { builtin allocsize(0) "memprof"="notcold" } ; MEMPROFNOCOLINFO: #[[A2]] = { builtin allocsize(0) "memprof"="cold" } diff --git a/llvm/test/Verifier/memprof-metadata-bad.ll b/llvm/test/Verifier/memprof-metadata-bad.ll index f4f1f6bb0a463..b8c2c2d8a2c99 100644 --- a/llvm/test/Verifier/memprof-metadata-bad.ll +++ b/llvm/test/Verifier/memprof-metadata-bad.ll @@ -43,7 +43,7 @@ declare dso_local noalias noundef ptr @malloc(i64 noundef) !6 = !{i64 0} !7 = !{!8} ; CHECK: call stack metadata should have at least 1 operand -; CHECK: Not all !memprof MemInfoBlock operands 1 to N-1 are MDString +; CHECK: Not all !memprof MemInfoBlock operands 2 to N are MDNode !8 = !{!0, !"default", i64 0, i64 5} !9 = !{i64 123} ; CHECK: call stack metadata operand should be constant integer