6060#include " llvm/MC/StringTableBuilder.h"
6161#include " llvm/MC/TargetRegistry.h"
6262#include " llvm/Object/IRSymtab.h"
63+ #include " llvm/ProfileData/MemProf.h"
6364#include " llvm/Support/AtomicOrdering.h"
6465#include " llvm/Support/Casting.h"
6566#include " llvm/Support/CommandLine.h"
8384#include < vector>
8485
8586using namespace llvm ;
87+ using namespace llvm ::memprof;
8688
8789static cl::opt<unsigned >
8890 IndexThreshold (" bitcode-mdindex-threshold" , cl::Hidden, cl::init(25 ),
@@ -231,7 +233,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
231233 SmallVector<uint64_t , 64 > &NameVals, GlobalValueSummary *Summary,
232234 unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
233235 unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId,
234- const Function &F);
236+ const Function &F, DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
237+ CallStackId &CallStackCount);
235238 void writeModuleLevelReferences (const GlobalVariable &V,
236239 SmallVector<uint64_t , 64 > &NameVals,
237240 unsigned FSModRefsAbbrev,
@@ -4195,12 +4198,58 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
41954198 }
41964199}
41974200
4201+ // Adds the allocation contexts to the CallStacks map. We simply use the
4202+ // size at the time the context was added as the CallStackId. This works because
4203+ // when we look up the call stacks later on we process the function summaries
4204+ // and their allocation records in the same exact order.
4205+ static void collectMemProfCallStacks (
4206+ FunctionSummary *FS, std::function<LinearFrameId(unsigned )> GetStackIndex,
4207+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks) {
4208+ // The interfaces in ProfileData/MemProf.h use a type alias for a stack frame
4209+ // id offset into the index of the full stack frames. The ModuleSummaryIndex
4210+ // currently uses unsigned. Make sure these stay in sync.
4211+ static_assert (std::is_same_v<LinearFrameId, unsigned >);
4212+ for (auto &AI : FS->allocs ()) {
4213+ for (auto &MIB : AI.MIBs ) {
4214+ SmallVector<unsigned > StackIdIndices;
4215+ StackIdIndices.reserve (MIB.StackIdIndices .size ());
4216+ for (auto Id : MIB.StackIdIndices )
4217+ StackIdIndices.push_back (GetStackIndex (Id));
4218+ // The CallStackId is the size at the time this context was inserted.
4219+ CallStacks.insert ({CallStacks.size (), StackIdIndices});
4220+ }
4221+ }
4222+ }
4223+
4224+ // Build the radix tree from the accumulated CallStacks, write out the resulting
4225+ // linearized radix tree array, and return the map of call stack positions into
4226+ // this array for use when writing the allocation records. The returned map is
4227+ // indexed by a CallStackId which in this case is implicitly determined by the
4228+ // order of function summaries and their allocation infos being written.
4229+ static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree (
4230+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks,
4231+ BitstreamWriter &Stream, unsigned RadixAbbrev) {
4232+ assert (!CallStacks.empty ());
4233+ DenseMap<unsigned , FrameStat> FrameHistogram =
4234+ computeFrameHistogram<LinearFrameId>(CallStacks);
4235+ CallStackRadixTreeBuilder<LinearFrameId> Builder;
4236+ // We don't need a MemProfFrameIndexes map as we have already converted the
4237+ // full stack id hash to a linear offset into the StackIds array.
4238+ Builder.build (std::move (CallStacks), /* MemProfFrameIndexes=*/ std::nullopt ,
4239+ FrameHistogram);
4240+ Stream.EmitRecord (bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray (),
4241+ RadixAbbrev);
4242+ return Builder.takeCallStackPos ();
4243+ }
4244+
41984245static void writeFunctionHeapProfileRecords (
41994246 BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
42004247 unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule,
42014248 std::function<unsigned (const ValueInfo &VI)> GetValueID,
42024249 std::function<unsigned(unsigned )> GetStackIndex,
4203- bool WriteContextSizeInfoIndex) {
4250+ bool WriteContextSizeInfoIndex,
4251+ DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
4252+ CallStackId &CallStackCount) {
42044253 SmallVector<uint64_t > Record;
42054254
42064255 for (auto &CI : FS->callsites ()) {
@@ -4234,9 +4283,9 @@ static void writeFunctionHeapProfileRecords(
42344283 Record.push_back (AI.Versions .size ());
42354284 for (auto &MIB : AI.MIBs ) {
42364285 Record.push_back ((uint8_t )MIB.AllocType );
4237- Record. push_back (MIB. StackIdIndices . size ());
4238- for ( auto Id : MIB. StackIdIndices )
4239- Record.push_back (GetStackIndex (Id) );
4286+ // Record the index into the radix tree array for this context.
4287+ assert (CallStackCount <= CallStackPos. size ());
4288+ Record.push_back (CallStackPos[CallStackCount++] );
42404289 }
42414290 if (!PerModule) {
42424291 for (auto V : AI.Versions )
@@ -4282,7 +4331,9 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
42824331 SmallVector<uint64_t , 64 > &NameVals, GlobalValueSummary *Summary,
42834332 unsigned ValueID, unsigned FSCallsRelBFAbbrev,
42844333 unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
4285- unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) {
4334+ unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F,
4335+ DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
4336+ CallStackId &CallStackCount) {
42864337 NameVals.push_back (ValueID);
42874338
42884339 FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -4297,7 +4348,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
42974348 /* PerModule*/ true ,
42984349 /* GetValueId*/ [&](const ValueInfo &VI) { return getValueId (VI); },
42994350 /* GetStackIndex*/ [&](unsigned I) { return I; },
4300- /* WriteContextSizeInfoIndex*/ true );
4351+ /* WriteContextSizeInfoIndex*/ true , CallStackPos, CallStackCount );
43014352
43024353 auto SpecialRefCnts = FS->specialRefCounts ();
43034354 NameVals.push_back (getEncodedGVSummaryFlags (FS->flags ()));
@@ -4530,12 +4581,52 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
45304581 Abbv = std::make_shared<BitCodeAbbrev>();
45314582 Abbv->Add (BitCodeAbbrevOp (bitc::FS_PERMODULE_ALLOC_INFO));
45324583 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // nummib
4533- // n x (alloc type, numstackids, numstackids x stackidindex )
4584+ // n x (alloc type, context radix tree index )
45344585 // optional: nummib x (numcontext x total size)
45354586 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
45364587 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
45374588 unsigned AllocAbbrev = Stream.EmitAbbrev (std::move (Abbv));
45384589
4590+ Abbv = std::make_shared<BitCodeAbbrev>();
4591+ Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4592+ // n x entry
4593+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4594+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4595+ unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4596+
4597+ // First walk through all the functions and collect the allocation contexts in
4598+ // their associated summaries, for use in constructing a radix tree of
4599+ // contexts. Note that we need to do this in the same order as the functions
4600+ // are processed further below since the call stack positions in the resulting
4601+ // radix tree array are identified based on this order.
4602+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4603+ for (const Function &F : M) {
4604+ // Summary emission does not support anonymous functions, they have to
4605+ // renamed using the anonymous function renaming pass.
4606+ if (!F.hasName ())
4607+ report_fatal_error (" Unexpected anonymous function when writing summary" );
4608+
4609+ ValueInfo VI = Index->getValueInfo (F.getGUID ());
4610+ if (!VI || VI.getSummaryList ().empty ()) {
4611+ // Only declarations should not have a summary (a declaration might
4612+ // however have a summary if the def was in module level asm).
4613+ assert (F.isDeclaration ());
4614+ continue ;
4615+ }
4616+ auto *Summary = VI.getSummaryList ()[0 ].get ();
4617+ FunctionSummary *FS = cast<FunctionSummary>(Summary);
4618+ collectMemProfCallStacks (
4619+ FS, /* GetStackIndex*/ [&](unsigned I) { return I; }, CallStacks);
4620+ }
4621+ // Finalize the radix tree, write it out, and get the map of positions in the
4622+ // linearized tree array.
4623+ DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4624+ if (!CallStacks.empty ())
4625+ CallStackPos = writeMemoryProfileRadixTree (CallStacks, Stream, RadixAbbrev);
4626+
4627+ // Keep track of the current index into the CallStackPos map.
4628+ CallStackId CallStackCount = 0 ;
4629+
45394630 SmallVector<uint64_t , 64 > NameVals;
45404631 // Iterate over the list of functions instead of the Index to
45414632 // ensure the ordering is stable.
@@ -4555,7 +4646,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
45554646 auto *Summary = VI.getSummaryList ()[0 ].get ();
45564647 writePerModuleFunctionSummaryRecord (
45574648 NameVals, Summary, VE.getValueID (&F), FSCallsRelBFAbbrev,
4558- FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F);
4649+ FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F,
4650+ CallStackPos, CallStackCount);
45594651 }
45604652
45614653 // Capture references from GlobalVariable initializers, which are outside
@@ -4692,13 +4784,20 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
46924784 Abbv->Add (BitCodeAbbrevOp (bitc::FS_COMBINED_ALLOC_INFO));
46934785 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // nummib
46944786 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // numver
4695- // nummib x (alloc type, numstackids, numstackids x stackidindex ),
4787+ // nummib x (alloc type, context radix tree index ),
46964788 // numver x version
46974789 // optional: nummib x total size
46984790 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
46994791 Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
47004792 unsigned AllocAbbrev = Stream.EmitAbbrev (std::move (Abbv));
47014793
4794+ Abbv = std::make_shared<BitCodeAbbrev>();
4795+ Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4796+ // n x entry
4797+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4798+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4799+ unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4800+
47024801 auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
47034802 if (DecSummaries == nullptr )
47044803 return false ;
@@ -4735,6 +4834,39 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
47354834 NameVals.clear ();
47364835 };
47374836
4837+ // First walk through all the functions and collect the allocation contexts in
4838+ // their associated summaries, for use in constructing a radix tree of
4839+ // contexts. Note that we need to do this in the same order as the functions
4840+ // are processed further below since the call stack positions in the resulting
4841+ // radix tree array are identified based on this order.
4842+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4843+ forEachSummary ([&](GVInfo I, bool IsAliasee) {
4844+ GlobalValueSummary *S = I.second ;
4845+ assert (S);
4846+ auto *FS = dyn_cast<FunctionSummary>(S);
4847+ if (!FS)
4848+ return ;
4849+ collectMemProfCallStacks (
4850+ FS,
4851+ /* GetStackIndex*/
4852+ [&](unsigned I) {
4853+ // Get the corresponding index into the list of StackIds actually
4854+ // being written for this combined index (which may be a subset in
4855+ // the case of distributed indexes).
4856+ assert (StackIdIndicesToIndex.contains (I));
4857+ return StackIdIndicesToIndex[I];
4858+ },
4859+ CallStacks);
4860+ });
4861+ // Finalize the radix tree, write it out, and get the map of positions in the
4862+ // linearized tree array.
4863+ DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4864+ if (!CallStacks.empty ())
4865+ CallStackPos = writeMemoryProfileRadixTree (CallStacks, Stream, RadixAbbrev);
4866+
4867+ // Keep track of the current index into the CallStackPos map.
4868+ CallStackId CallStackCount = 0 ;
4869+
47384870 DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
47394871 forEachSummary ([&](GVInfo I, bool IsAliasee) {
47404872 GlobalValueSummary *S = I.second ;
@@ -4813,7 +4945,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
48134945 assert (StackIdIndicesToIndex.contains (I));
48144946 return StackIdIndicesToIndex[I];
48154947 },
4816- /* WriteContextSizeInfoIndex*/ false );
4948+ /* WriteContextSizeInfoIndex*/ false , CallStackPos, CallStackCount );
48174949
48184950 NameVals.push_back (*ValueId);
48194951 assert (ModuleIdMap.count (FS->modulePath ()));
0 commit comments