diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 73e74f3836ca4..b362a88963f6c 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -335,6 +335,11 @@ enum GlobalValueSummarySymtabCodes { // CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format. // [n x entry] FS_CONTEXT_RADIX_TREE_ARRAY = 32, + // Summary of combined index allocation memprof metadata, without context. + // [nummib, numver, + // nummib x alloc type, + // numver x version] + FS_COMBINED_ALLOC_INFO_NO_CONTEXT = 33, }; enum MetadataCodes { diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 96f86eb52f15c..2dfc8254d5885 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -10781,12 +10781,15 @@ bool LLParser::parseMemProfs(std::vector &MIBs) { return true; SmallVector StackIdIndices; - do { - uint64_t StackId = 0; - if (parseUInt64(StackId)) - return true; - StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId)); - } while (EatIfPresent(lltok::comma)); + // Combined index alloc records may not have a stack id list. + if (Lex.getKind() != lltok::rparen) { + do { + uint64_t StackId = 0; + if (parseUInt64(StackId)) + return true; + StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId)); + } while (EatIfPresent(lltok::comma)); + } if (parseToken(lltok::rparen, "expected ')' in stackIds")) return true; diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index 032c0de3c7a00..fe9e0ddca7091 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -330,6 +330,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(FS, STACK_IDS) STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS) STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY) + STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO_NO_CONTEXT) } case bitc::METADATA_ATTACHMENT_ID: switch (CodeID) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index de7e9bbe69bd4..a7fbb0c74cb1e 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -8178,7 +8178,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { break; } - case bitc::FS_COMBINED_ALLOC_INFO: { + case bitc::FS_COMBINED_ALLOC_INFO: + case bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT: { unsigned I = 0; std::vector MIBs; unsigned NumMIBs = Record[I++]; @@ -8187,7 +8188,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { while (MIBsRead++ < NumMIBs) { assert(Record.size() - I >= 2); AllocationType AllocType = (AllocationType)Record[I++]; - auto StackIdList = parseAllocInfoContext(Record, I); + SmallVector StackIdList; + if (BitCode == bitc::FS_COMBINED_ALLOC_INFO) + StackIdList = parseAllocInfoContext(Record, I); MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList))); } assert(Record.size() - I >= NumVersions); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 27ada0ddcd831..ef397879a132c 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -99,6 +99,22 @@ static cl::opt WriteRelBFToSummary( "write-relbf-to-summary", cl::Hidden, cl::init(false), cl::desc("Write relative block frequency to function summary ")); +// Since we only use the context information in the memprof summary records in +// the LTO backends to do assertion checking, save time and space by only +// serializing the context for non-NDEBUG builds. +// TODO: Currently this controls writing context of the allocation info records, +// which are larger and more expensive, but we should do this for the callsite +// records as well. +// FIXME: Convert to a const once this has undergone more sigificant testing. +static cl::opt + CombinedIndexMemProfContext("combined-index-memprof-context", cl::Hidden, +#ifdef NDEBUG + cl::init(false), +#else + cl::init(true), +#endif + cl::desc("")); + namespace llvm { extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold; } @@ -528,10 +544,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase { for (auto Idx : CI.StackIdIndices) RecordStackIdReference(Idx); } - for (auto &AI : FS->allocs()) - for (auto &MIB : AI.MIBs) - for (auto Idx : MIB.StackIdIndices) - RecordStackIdReference(Idx); + if (CombinedIndexMemProfContext) { + for (auto &AI : FS->allocs()) + for (auto &MIB : AI.MIBs) + for (auto Idx : MIB.StackIdIndices) + RecordStackIdReference(Idx); + } }); } @@ -4349,9 +4367,14 @@ static void writeFunctionHeapProfileRecords( Record.push_back(AI.Versions.size()); for (auto &MIB : AI.MIBs) { Record.push_back((uint8_t)MIB.AllocType); - // Record the index into the radix tree array for this context. - assert(CallStackCount <= CallStackPos.size()); - Record.push_back(CallStackPos[CallStackCount++]); + // The per-module summary always needs to include the alloc context, as we + // use it during the thin link. For the combined index it is optional (see + // comments where CombinedIndexMemProfContext is defined). + if (PerModule || CombinedIndexMemProfContext) { + // Record the index into the radix tree array for this context. + assert(CallStackCount <= CallStackPos.size()); + Record.push_back(CallStackPos[CallStackCount++]); + } } if (!PerModule) llvm::append_range(Record, AI.Versions); @@ -4384,8 +4407,11 @@ static void writeFunctionHeapProfileRecords( Stream.EmitRecord(bitc::FS_ALLOC_CONTEXT_IDS, ContextIds, ContextIdAbbvId); } - Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO - : bitc::FS_COMBINED_ALLOC_INFO, + Stream.EmitRecord(PerModule + ? bitc::FS_PERMODULE_ALLOC_INFO + : (CombinedIndexMemProfContext + ? bitc::FS_COMBINED_ALLOC_INFO + : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT), Record, AllocAbbrev); } } @@ -4847,7 +4873,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv)); Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO)); + Abbv->Add(BitCodeAbbrevOp(CombinedIndexMemProfContext + ? bitc::FS_COMBINED_ALLOC_INFO + : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver // nummib x (alloc type, context radix tree index), @@ -4857,13 +4885,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY)); - // n x entry - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); - unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool { if (DecSummaries == nullptr) return false; @@ -4900,44 +4921,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.clear(); }; - // First walk through all the functions and collect the allocation contexts in - // their associated summaries, for use in constructing a radix tree of - // contexts. Note that we need to do this in the same order as the functions - // are processed further below since the call stack positions in the resulting - // radix tree array are identified based on this order. - MapVector> CallStacks; - forEachSummary([&](GVInfo I, bool IsAliasee) { - // Don't collect this when invoked for an aliasee, as it is not needed for - // the alias summary. If the aliasee is to be imported, we will invoke this - // separately with IsAliasee=false. - if (IsAliasee) - return; - GlobalValueSummary *S = I.second; - assert(S); - auto *FS = dyn_cast(S); - if (!FS) - return; - collectMemProfCallStacks( - FS, - /*GetStackIndex*/ - [&](unsigned I) { - // Get the corresponding index into the list of StackIds actually - // being written for this combined index (which may be a subset in - // the case of distributed indexes). - assert(StackIdIndicesToIndex.contains(I)); - return StackIdIndicesToIndex[I]; - }, - CallStacks); - }); - // Finalize the radix tree, write it out, and get the map of positions in the - // linearized tree array. DenseMap CallStackPos; - if (!CallStacks.empty()) { - CallStackPos = - writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev); + if (CombinedIndexMemProfContext) { + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY)); + // n x entry + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // First walk through all the functions and collect the allocation contexts + // in their associated summaries, for use in constructing a radix tree of + // contexts. Note that we need to do this in the same order as the functions + // are processed further below since the call stack positions in the + // resulting radix tree array are identified based on this order. + MapVector> CallStacks; + forEachSummary([&](GVInfo I, bool IsAliasee) { + // Don't collect this when invoked for an aliasee, as it is not needed for + // the alias summary. If the aliasee is to be imported, we will invoke + // this separately with IsAliasee=false. + if (IsAliasee) + return; + GlobalValueSummary *S = I.second; + assert(S); + auto *FS = dyn_cast(S); + if (!FS) + return; + collectMemProfCallStacks( + FS, + /*GetStackIndex*/ + [&](unsigned I) { + // Get the corresponding index into the list of StackIds actually + // being written for this combined index (which may be a subset in + // the case of distributed indexes). + assert(StackIdIndicesToIndex.contains(I)); + return StackIdIndicesToIndex[I]; + }, + CallStacks); + }); + // Finalize the radix tree, write it out, and get the map of positions in + // the linearized tree array. + if (!CallStacks.empty()) { + CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream, + RadixAbbrev); + } } - // Keep track of the current index into the CallStackPos map. + // Keep track of the current index into the CallStackPos map. Not used if + // CombinedIndexMemProfContext is false. CallStackId CallStackCount = 0; DenseSet DefOrUseGUIDs; diff --git a/llvm/test/Assembler/thinlto-memprof-summary.ll b/llvm/test/Assembler/thinlto-memprof-summary.ll index 69eafc967c2a3..4e4a928c1f024 100644 --- a/llvm/test/Assembler/thinlto-memprof-summary.ll +++ b/llvm/test/Assembler/thinlto-memprof-summary.ll @@ -1,5 +1,12 @@ ;; Test memprof summary parsing (tests all types/fields in various combinations). -; RUN: llvm-as %s -o - | llvm-dis -o - | FileCheck %s + +;; Force enable -combined-index-memprof-context to get the allocation context +;; stack ids even in release builds. +; RUN: llvm-as %s -o - -combined-index-memprof-context | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,CONTEXT + +;; Force disable -combined-index-memprof-context to block the allocation context +;; stack ids even in non-release builds. +; RUN: llvm-as %s -o - -combined-index-memprof-context=false | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,NOCONTEXT ; ModuleID = 'thinlto-memprof-summary.thinlto.bc' @@ -17,8 +24,10 @@ ; Make sure we get back from llvm-dis what we put in via llvm-as. ; CHECK: ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049)) -; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321)))))))) +; CONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321)))))))) +; NOCONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()), (type: hot, stackIds: ()))))))) ; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789)))))) -; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789)))))))) +; CONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789)))))))) +; NOCONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: ()), (type: notcold, stackIds: ()))))))) ; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789)))))) ; CHECK: ^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414)))))) diff --git a/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll b/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll index 63139cacd8fba..fc7d04fcf4d58 100644 --- a/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll +++ b/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll @@ -33,6 +33,7 @@ ; RUN: llvm-lto2 run %t/b.o %t/a.o -enable-memprof-context-disambiguation \ ; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ +; RUN: -combined-index-memprof-context \ ; RUN: -r=%t/b.o,_Z3fooi,plx \ ; RUN: -r=%t/b.o,aliasee,plx \ ; RUN: -r=%t/b.o,a \