@@ -99,6 +99,22 @@ static cl::opt<bool> WriteRelBFToSummary(
9999 " write-relbf-to-summary" , cl::Hidden, cl::init(false ),
100100 cl::desc(" Write relative block frequency to function summary " ));
101101
102+ // Since we only use the context information in the memprof summary records in
103+ // the LTO backends to do assertion checking, save time and space by only
104+ // serializing the context for non-NDEBUG builds.
105+ // TODO: Currently this controls writing context of the allocation info records,
106+ // which are larger and more expensive, but we should do this for the callsite
107+ // records as well.
108+ // FIXME: Convert to a const once this has undergone more sigificant testing.
109+ static cl::opt<bool >
110+ CombinedIndexMemProfContext (" combined-index-memprof-context" , cl::Hidden,
111+ #ifdef NDEBUG
112+ cl::init (false ),
113+ #else
114+ cl::init (true ),
115+ #endif
116+ cl::desc (" " ));
117+
102118namespace llvm {
103119extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
104120}
@@ -528,10 +544,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
528544 for (auto Idx : CI.StackIdIndices )
529545 RecordStackIdReference (Idx);
530546 }
531- for (auto &AI : FS->allocs ())
532- for (auto &MIB : AI.MIBs )
533- for (auto Idx : MIB.StackIdIndices )
534- RecordStackIdReference (Idx);
547+ if (CombinedIndexMemProfContext) {
548+ for (auto &AI : FS->allocs ())
549+ for (auto &MIB : AI.MIBs )
550+ for (auto Idx : MIB.StackIdIndices )
551+ RecordStackIdReference (Idx);
552+ }
535553 });
536554 }
537555
@@ -4349,9 +4367,14 @@ static void writeFunctionHeapProfileRecords(
43494367 Record.push_back (AI.Versions .size ());
43504368 for (auto &MIB : AI.MIBs ) {
43514369 Record.push_back ((uint8_t )MIB.AllocType );
4352- // Record the index into the radix tree array for this context.
4353- assert (CallStackCount <= CallStackPos.size ());
4354- Record.push_back (CallStackPos[CallStackCount++]);
4370+ // The per-module summary always needs to include the alloc context, as we
4371+ // use it during the thin link. For the combined index it is optional (see
4372+ // comments where CombinedIndexMemProfContext is defined).
4373+ if (PerModule || CombinedIndexMemProfContext) {
4374+ // Record the index into the radix tree array for this context.
4375+ assert (CallStackCount <= CallStackPos.size ());
4376+ Record.push_back (CallStackPos[CallStackCount++]);
4377+ }
43554378 }
43564379 if (!PerModule)
43574380 llvm::append_range (Record, AI.Versions );
@@ -4384,8 +4407,11 @@ static void writeFunctionHeapProfileRecords(
43844407 Stream.EmitRecord (bitc::FS_ALLOC_CONTEXT_IDS, ContextIds,
43854408 ContextIdAbbvId);
43864409 }
4387- Stream.EmitRecord (PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
4388- : bitc::FS_COMBINED_ALLOC_INFO,
4410+ Stream.EmitRecord (PerModule
4411+ ? bitc::FS_PERMODULE_ALLOC_INFO
4412+ : (CombinedIndexMemProfContext
4413+ ? bitc::FS_COMBINED_ALLOC_INFO
4414+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT),
43894415 Record, AllocAbbrev);
43904416 }
43914417}
@@ -4847,7 +4873,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
48474873 unsigned CallsiteAbbrev = Stream.EmitAbbrev (std::move (Abbv));
48484874
48494875 Abbv = std::make_shared<BitCodeAbbrev>();
4850- Abbv->Add (BitCodeAbbrevOp (bitc::FS_COMBINED_ALLOC_INFO));
4876+ Abbv->Add (BitCodeAbbrevOp (CombinedIndexMemProfContext
4877+ ? bitc::FS_COMBINED_ALLOC_INFO
4878+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT));
48514879 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // nummib
48524880 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // numver
48534881 // nummib x (alloc type, context radix tree index),
@@ -4857,13 +4885,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
48574885 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
48584886 unsigned AllocAbbrev = Stream.EmitAbbrev (std::move (Abbv));
48594887
4860- Abbv = std::make_shared<BitCodeAbbrev>();
4861- Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4862- // n x entry
4863- Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4864- Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4865- unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4866-
48674888 auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
48684889 if (DecSummaries == nullptr )
48694890 return false ;
@@ -4900,44 +4921,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
49004921 NameVals.clear ();
49014922 };
49024923
4903- // First walk through all the functions and collect the allocation contexts in
4904- // their associated summaries, for use in constructing a radix tree of
4905- // contexts. Note that we need to do this in the same order as the functions
4906- // are processed further below since the call stack positions in the resulting
4907- // radix tree array are identified based on this order.
4908- MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4909- forEachSummary ([&](GVInfo I, bool IsAliasee) {
4910- // Don't collect this when invoked for an aliasee, as it is not needed for
4911- // the alias summary. If the aliasee is to be imported, we will invoke this
4912- // separately with IsAliasee=false.
4913- if (IsAliasee)
4914- return ;
4915- GlobalValueSummary *S = I.second ;
4916- assert (S);
4917- auto *FS = dyn_cast<FunctionSummary>(S);
4918- if (!FS)
4919- return ;
4920- collectMemProfCallStacks (
4921- FS,
4922- /* GetStackIndex*/
4923- [&](unsigned I) {
4924- // Get the corresponding index into the list of StackIds actually
4925- // being written for this combined index (which may be a subset in
4926- // the case of distributed indexes).
4927- assert (StackIdIndicesToIndex.contains (I));
4928- return StackIdIndicesToIndex[I];
4929- },
4930- CallStacks);
4931- });
4932- // Finalize the radix tree, write it out, and get the map of positions in the
4933- // linearized tree array.
49344924 DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4935- if (!CallStacks.empty ()) {
4936- CallStackPos =
4937- writeMemoryProfileRadixTree (std::move (CallStacks), Stream, RadixAbbrev);
4925+ if (CombinedIndexMemProfContext) {
4926+ Abbv = std::make_shared<BitCodeAbbrev>();
4927+ Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4928+ // n x entry
4929+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4930+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4931+ unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4932+
4933+ // First walk through all the functions and collect the allocation contexts
4934+ // in their associated summaries, for use in constructing a radix tree of
4935+ // contexts. Note that we need to do this in the same order as the functions
4936+ // are processed further below since the call stack positions in the
4937+ // resulting radix tree array are identified based on this order.
4938+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4939+ forEachSummary ([&](GVInfo I, bool IsAliasee) {
4940+ // Don't collect this when invoked for an aliasee, as it is not needed for
4941+ // the alias summary. If the aliasee is to be imported, we will invoke
4942+ // this separately with IsAliasee=false.
4943+ if (IsAliasee)
4944+ return ;
4945+ GlobalValueSummary *S = I.second ;
4946+ assert (S);
4947+ auto *FS = dyn_cast<FunctionSummary>(S);
4948+ if (!FS)
4949+ return ;
4950+ collectMemProfCallStacks (
4951+ FS,
4952+ /* GetStackIndex*/
4953+ [&](unsigned I) {
4954+ // Get the corresponding index into the list of StackIds actually
4955+ // being written for this combined index (which may be a subset in
4956+ // the case of distributed indexes).
4957+ assert (StackIdIndicesToIndex.contains (I));
4958+ return StackIdIndicesToIndex[I];
4959+ },
4960+ CallStacks);
4961+ });
4962+ // Finalize the radix tree, write it out, and get the map of positions in
4963+ // the linearized tree array.
4964+ if (!CallStacks.empty ()) {
4965+ CallStackPos = writeMemoryProfileRadixTree (std::move (CallStacks), Stream,
4966+ RadixAbbrev);
4967+ }
49384968 }
49394969
4940- // Keep track of the current index into the CallStackPos map.
4970+ // Keep track of the current index into the CallStackPos map. Not used if
4971+ // CombinedIndexMemProfContext is false.
49414972 CallStackId CallStackCount = 0 ;
49424973
49434974 DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
0 commit comments