@@ -99,6 +99,21 @@ static cl::opt<bool> WriteRelBFToSummary(
9999 " write-relbf-to-summary" , cl::Hidden, cl::init(false ),
100100 cl::desc(" Write relative block frequency to function summary " ));
101101
102+ // Since we only use the context information in the memprof summary records in
103+ // the LTO backends to do assertion checking, save time and space by only
104+ // serializing the context for non-NDEBUG builds.
105+ // TODO: Currently this controls writing context of the allocation info records,
106+ // which are larger and more expensive, but we should do this for the callsite
107+ // records as well.
108+ static cl::opt<bool >
109+ CombinedIndexMemProfContext (" combined-index-memprof-context" , cl::Hidden,
110+ #ifndef NDEBUG
111+ cl::init (true ),
112+ #else
113+ cl::init (false ),
114+ #endif
115+ cl::desc (" " ));
116+
102117namespace llvm {
103118extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
104119}
@@ -528,10 +543,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
528543 for (auto Idx : CI.StackIdIndices )
529544 RecordStackIdReference (Idx);
530545 }
531- for (auto &AI : FS->allocs ())
532- for (auto &MIB : AI.MIBs )
533- for (auto Idx : MIB.StackIdIndices )
534- RecordStackIdReference (Idx);
546+ if (CombinedIndexMemProfContext) {
547+ for (auto &AI : FS->allocs ())
548+ for (auto &MIB : AI.MIBs )
549+ for (auto Idx : MIB.StackIdIndices )
550+ RecordStackIdReference (Idx);
551+ }
535552 });
536553 }
537554
@@ -4349,9 +4366,11 @@ static void writeFunctionHeapProfileRecords(
43494366 Record.push_back (AI.Versions .size ());
43504367 for (auto &MIB : AI.MIBs ) {
43514368 Record.push_back ((uint8_t )MIB.AllocType );
4352- // Record the index into the radix tree array for this context.
4353- assert (CallStackCount <= CallStackPos.size ());
4354- Record.push_back (CallStackPos[CallStackCount++]);
4369+ if (PerModule || CombinedIndexMemProfContext) {
4370+ // Record the index into the radix tree array for this context.
4371+ assert (CallStackCount <= CallStackPos.size ());
4372+ Record.push_back (CallStackPos[CallStackCount++]);
4373+ }
43554374 }
43564375 if (!PerModule)
43574376 llvm::append_range (Record, AI.Versions );
@@ -4384,8 +4403,11 @@ static void writeFunctionHeapProfileRecords(
43844403 Stream.EmitRecord (bitc::FS_ALLOC_CONTEXT_IDS, ContextIds,
43854404 ContextIdAbbvId);
43864405 }
4387- Stream.EmitRecord (PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
4388- : bitc::FS_COMBINED_ALLOC_INFO,
4406+ Stream.EmitRecord (PerModule
4407+ ? bitc::FS_PERMODULE_ALLOC_INFO
4408+ : (CombinedIndexMemProfContext
4409+ ? bitc::FS_COMBINED_ALLOC_INFO
4410+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT),
43894411 Record, AllocAbbrev);
43904412 }
43914413}
@@ -4847,7 +4869,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
48474869 unsigned CallsiteAbbrev = Stream.EmitAbbrev (std::move (Abbv));
48484870
48494871 Abbv = std::make_shared<BitCodeAbbrev>();
4850- Abbv->Add (BitCodeAbbrevOp (bitc::FS_COMBINED_ALLOC_INFO));
4872+ Abbv->Add (BitCodeAbbrevOp (CombinedIndexMemProfContext
4873+ ? bitc::FS_COMBINED_ALLOC_INFO
4874+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT));
48514875 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // nummib
48524876 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // numver
48534877 // nummib x (alloc type, context radix tree index),
@@ -4857,13 +4881,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
48574881 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
48584882 unsigned AllocAbbrev = Stream.EmitAbbrev (std::move (Abbv));
48594883
4860- Abbv = std::make_shared<BitCodeAbbrev>();
4861- Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4862- // n x entry
4863- Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4864- Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4865- unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4866-
48674884 auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
48684885 if (DecSummaries == nullptr )
48694886 return false ;
@@ -4900,44 +4917,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
49004917 NameVals.clear ();
49014918 };
49024919
4903- // First walk through all the functions and collect the allocation contexts in
4904- // their associated summaries, for use in constructing a radix tree of
4905- // contexts. Note that we need to do this in the same order as the functions
4906- // are processed further below since the call stack positions in the resulting
4907- // radix tree array are identified based on this order.
4908- MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4909- forEachSummary ([&](GVInfo I, bool IsAliasee) {
4910- // Don't collect this when invoked for an aliasee, as it is not needed for
4911- // the alias summary. If the aliasee is to be imported, we will invoke this
4912- // separately with IsAliasee=false.
4913- if (IsAliasee)
4914- return ;
4915- GlobalValueSummary *S = I.second ;
4916- assert (S);
4917- auto *FS = dyn_cast<FunctionSummary>(S);
4918- if (!FS)
4919- return ;
4920- collectMemProfCallStacks (
4921- FS,
4922- /* GetStackIndex*/
4923- [&](unsigned I) {
4924- // Get the corresponding index into the list of StackIds actually
4925- // being written for this combined index (which may be a subset in
4926- // the case of distributed indexes).
4927- assert (StackIdIndicesToIndex.contains (I));
4928- return StackIdIndicesToIndex[I];
4929- },
4930- CallStacks);
4931- });
4932- // Finalize the radix tree, write it out, and get the map of positions in the
4933- // linearized tree array.
49344920 DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4935- if (!CallStacks.empty ()) {
4936- CallStackPos =
4937- writeMemoryProfileRadixTree (std::move (CallStacks), Stream, RadixAbbrev);
4921+ if (CombinedIndexMemProfContext) {
4922+ Abbv = std::make_shared<BitCodeAbbrev>();
4923+ Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4924+ // n x entry
4925+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4926+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4927+ unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4928+
4929+ // First walk through all the functions and collect the allocation contexts
4930+ // in their associated summaries, for use in constructing a radix tree of
4931+ // contexts. Note that we need to do this in the same order as the functions
4932+ // are processed further below since the call stack positions in the
4933+ // resulting radix tree array are identified based on this order.
4934+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4935+ forEachSummary ([&](GVInfo I, bool IsAliasee) {
4936+ // Don't collect this when invoked for an aliasee, as it is not needed for
4937+ // the alias summary. If the aliasee is to be imported, we will invoke
4938+ // this separately with IsAliasee=false.
4939+ if (IsAliasee)
4940+ return ;
4941+ GlobalValueSummary *S = I.second ;
4942+ assert (S);
4943+ auto *FS = dyn_cast<FunctionSummary>(S);
4944+ if (!FS)
4945+ return ;
4946+ collectMemProfCallStacks (
4947+ FS,
4948+ /* GetStackIndex*/
4949+ [&](unsigned I) {
4950+ // Get the corresponding index into the list of StackIds actually
4951+ // being written for this combined index (which may be a subset in
4952+ // the case of distributed indexes).
4953+ assert (StackIdIndicesToIndex.contains (I));
4954+ return StackIdIndicesToIndex[I];
4955+ },
4956+ CallStacks);
4957+ });
4958+ // Finalize the radix tree, write it out, and get the map of positions in
4959+ // the linearized tree array.
4960+ if (!CallStacks.empty ()) {
4961+ CallStackPos = writeMemoryProfileRadixTree (std::move (CallStacks), Stream,
4962+ RadixAbbrev);
4963+ }
49384964 }
49394965
4940- // Keep track of the current index into the CallStackPos map.
4966+ // Keep track of the current index into the CallStackPos map. Not used if
4967+ // CombinedIndexMemProfContext is false.
49414968 CallStackId CallStackCount = 0 ;
49424969
49434970 DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
0 commit comments