-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[MemProf] Disable alloc context in combined summary for ndebug builds #139161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -99,6 +99,21 @@ static cl::opt<bool> WriteRelBFToSummary( | |
| "write-relbf-to-summary", cl::Hidden, cl::init(false), | ||
| cl::desc("Write relative block frequency to function summary ")); | ||
|
|
||
| // Since we only use the context information in the memprof summary records in | ||
| // the LTO backends to do assertion checking, save time and space by only | ||
| // serializing the context for non-NDEBUG builds. | ||
| // TODO: Currently this controls writing context of the allocation info records, | ||
| // which are larger and more expensive, but we should do this for the callsite | ||
| // records as well. | ||
| static cl::opt<bool> | ||
| CombinedIndexMemProfContext("combined-index-memprof-context", cl::Hidden, | ||
| #ifndef NDEBUG | ||
|
||
| cl::init(true), | ||
| #else | ||
| cl::init(false), | ||
| #endif | ||
| cl::desc("")); | ||
|
|
||
| namespace llvm { | ||
| extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold; | ||
| } | ||
|
|
@@ -528,10 +543,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase { | |
| for (auto Idx : CI.StackIdIndices) | ||
| RecordStackIdReference(Idx); | ||
| } | ||
| for (auto &AI : FS->allocs()) | ||
| for (auto &MIB : AI.MIBs) | ||
| for (auto Idx : MIB.StackIdIndices) | ||
| RecordStackIdReference(Idx); | ||
| if (CombinedIndexMemProfContext) { | ||
| for (auto &AI : FS->allocs()) | ||
| for (auto &MIB : AI.MIBs) | ||
| for (auto Idx : MIB.StackIdIndices) | ||
| RecordStackIdReference(Idx); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
|
|
@@ -4349,9 +4366,11 @@ static void writeFunctionHeapProfileRecords( | |
| Record.push_back(AI.Versions.size()); | ||
| for (auto &MIB : AI.MIBs) { | ||
| Record.push_back((uint8_t)MIB.AllocType); | ||
| // Record the index into the radix tree array for this context. | ||
| assert(CallStackCount <= CallStackPos.size()); | ||
| Record.push_back(CallStackPos[CallStackCount++]); | ||
| if (PerModule || CombinedIndexMemProfContext) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain a bit about the PerModule usage here and below?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment here. The below usage was pre-existing so I wasn't sure another comment was needed. |
||
| // Record the index into the radix tree array for this context. | ||
| assert(CallStackCount <= CallStackPos.size()); | ||
| Record.push_back(CallStackPos[CallStackCount++]); | ||
| } | ||
| } | ||
| if (!PerModule) | ||
| llvm::append_range(Record, AI.Versions); | ||
|
|
@@ -4384,8 +4403,11 @@ static void writeFunctionHeapProfileRecords( | |
| Stream.EmitRecord(bitc::FS_ALLOC_CONTEXT_IDS, ContextIds, | ||
| ContextIdAbbvId); | ||
| } | ||
| Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO | ||
| : bitc::FS_COMBINED_ALLOC_INFO, | ||
| Stream.EmitRecord(PerModule | ||
| ? bitc::FS_PERMODULE_ALLOC_INFO | ||
| : (CombinedIndexMemProfContext | ||
| ? bitc::FS_COMBINED_ALLOC_INFO | ||
| : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT), | ||
| Record, AllocAbbrev); | ||
| } | ||
| } | ||
|
|
@@ -4847,7 +4869,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { | |
| unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv)); | ||
|
|
||
| Abbv = std::make_shared<BitCodeAbbrev>(); | ||
| Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO)); | ||
| Abbv->Add(BitCodeAbbrevOp(CombinedIndexMemProfContext | ||
| ? bitc::FS_COMBINED_ALLOC_INFO | ||
| : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT)); | ||
| Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib | ||
| Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver | ||
| // nummib x (alloc type, context radix tree index), | ||
|
|
@@ -4857,13 +4881,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { | |
| Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); | ||
| unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv)); | ||
|
|
||
| Abbv = std::make_shared<BitCodeAbbrev>(); | ||
| Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY)); | ||
| // n x entry | ||
| Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); | ||
| Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); | ||
| unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv)); | ||
|
|
||
| auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool { | ||
| if (DecSummaries == nullptr) | ||
| return false; | ||
|
|
@@ -4900,44 +4917,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { | |
| NameVals.clear(); | ||
| }; | ||
|
|
||
| // First walk through all the functions and collect the allocation contexts in | ||
| // their associated summaries, for use in constructing a radix tree of | ||
| // contexts. Note that we need to do this in the same order as the functions | ||
| // are processed further below since the call stack positions in the resulting | ||
| // radix tree array are identified based on this order. | ||
| MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks; | ||
| forEachSummary([&](GVInfo I, bool IsAliasee) { | ||
| // Don't collect this when invoked for an aliasee, as it is not needed for | ||
| // the alias summary. If the aliasee is to be imported, we will invoke this | ||
| // separately with IsAliasee=false. | ||
| if (IsAliasee) | ||
| return; | ||
| GlobalValueSummary *S = I.second; | ||
| assert(S); | ||
| auto *FS = dyn_cast<FunctionSummary>(S); | ||
| if (!FS) | ||
| return; | ||
| collectMemProfCallStacks( | ||
| FS, | ||
| /*GetStackIndex*/ | ||
| [&](unsigned I) { | ||
| // Get the corresponding index into the list of StackIds actually | ||
| // being written for this combined index (which may be a subset in | ||
| // the case of distributed indexes). | ||
| assert(StackIdIndicesToIndex.contains(I)); | ||
| return StackIdIndicesToIndex[I]; | ||
| }, | ||
| CallStacks); | ||
| }); | ||
| // Finalize the radix tree, write it out, and get the map of positions in the | ||
| // linearized tree array. | ||
| DenseMap<CallStackId, LinearCallStackId> CallStackPos; | ||
| if (!CallStacks.empty()) { | ||
| CallStackPos = | ||
| writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev); | ||
| if (CombinedIndexMemProfContext) { | ||
| Abbv = std::make_shared<BitCodeAbbrev>(); | ||
| Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY)); | ||
| // n x entry | ||
| Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); | ||
| Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); | ||
| unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv)); | ||
|
|
||
| // First walk through all the functions and collect the allocation contexts | ||
| // in their associated summaries, for use in constructing a radix tree of | ||
| // contexts. Note that we need to do this in the same order as the functions | ||
| // are processed further below since the call stack positions in the | ||
| // resulting radix tree array are identified based on this order. | ||
| MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks; | ||
| forEachSummary([&](GVInfo I, bool IsAliasee) { | ||
| // Don't collect this when invoked for an aliasee, as it is not needed for | ||
| // the alias summary. If the aliasee is to be imported, we will invoke | ||
| // this separately with IsAliasee=false. | ||
| if (IsAliasee) | ||
| return; | ||
| GlobalValueSummary *S = I.second; | ||
| assert(S); | ||
| auto *FS = dyn_cast<FunctionSummary>(S); | ||
| if (!FS) | ||
| return; | ||
| collectMemProfCallStacks( | ||
| FS, | ||
| /*GetStackIndex*/ | ||
| [&](unsigned I) { | ||
| // Get the corresponding index into the list of StackIds actually | ||
| // being written for this combined index (which may be a subset in | ||
| // the case of distributed indexes). | ||
| assert(StackIdIndicesToIndex.contains(I)); | ||
| return StackIdIndicesToIndex[I]; | ||
| }, | ||
| CallStacks); | ||
| }); | ||
| // Finalize the radix tree, write it out, and get the map of positions in | ||
| // the linearized tree array. | ||
| if (!CallStacks.empty()) { | ||
| CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream, | ||
| RadixAbbrev); | ||
| } | ||
| } | ||
|
|
||
| // Keep track of the current index into the CallStackPos map. | ||
| // Keep track of the current index into the CallStackPos map. Not used if | ||
| // CombinedIndexMemProfContext is false. | ||
| CallStackId CallStackCount = 0; | ||
|
|
||
| DenseSet<GlobalValue::GUID> DefOrUseGUIDs; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.