llvm · teresajohnson · Nov 15, 2024 · Oct 31, 2024 · Oct 31, 2024 · Nov 7, 2024
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -308,7 +308,7 @@ enum GlobalValueSummarySymtabCodes {
   FS_PERMODULE_CALLSITE_INFO = 26,
   // Summary of per-module allocation memprof metadata.
   // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
-  // [nummib x (numcontext x contextsizeindex)]?]
+  // [nummib x (numcontext x total size)]?]
   FS_PERMODULE_ALLOC_INFO = 27,
   // Summary of combined index memprof callsite metadata.
   // [valueid, numstackindices, numver,
@@ -322,10 +322,15 @@ enum GlobalValueSummarySymtabCodes {
   // List of all stack ids referenced by index in the callsite and alloc infos.
   // [n x stack id]
   FS_STACK_IDS = 30,
-  // List of all (full stack id, total size) pairs optionally referenced by
-  // index from the alloc info records.
-  // [n x (full stack id, total size)]
-  FS_CONTEXT_SIZE_INFO = 31,
+  // List of all full stack id pairs corresponding to the total sizes recorded
+  // at the end of the alloc info when reporting of hinted bytes is enabled.
+  // We use a fixed-width array, which is more efficient as these ids typically
+  // are close to 64 bits in size. The max fixed width value supported is 32
+  // bits so each 64-bit context id hash is recorded as a pair (upper 32 bits
+  // first). This record must immediately precede the associated alloc info, and
+  // the entries must be in the exact same order as the corresponding sizes.
+  // [nummib x (numcontext x full stack id)]
+  FS_ALLOC_CONTEXT_IDS = 31,
 };
 
 enum MetadataCodes {

diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -421,10 +421,8 @@ struct AllocInfo {
   // MIBs vector, if non-empty. Note that each MIB in the summary can have
   // multiple of these as we trim the contexts when possible during matching.
   // For hinted size reporting we, however, want the original pre-trimmed full
-  // stack context id for better correlation with the profile. Note that these
-  // are indexes into the ContextSizeInfos list in the index, to enable
-  // deduplication.
-  std::vector<std::vector<unsigned>> ContextSizeInfoIndices;
+  // stack context id for better correlation with the profile.
+  std::vector<std::vector<ContextTotalSize>> ContextSizeInfos;
 
   AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
     Versions.push_back(0);
@@ -446,19 +444,16 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) {
   for (auto &M : AE.MIBs) {
     OS << "\t\t" << M << "\n";
   }
-  if (!AE.ContextSizeInfoIndices.empty()) {
-    OS << " ContextSizeInfo index per MIB:\n\t\t";
-    First = true;
-    for (auto Indices : AE.ContextSizeInfoIndices) {
-      if (!First)
-        OS << ", ";
-      First = false;
-      bool FirstIndex = true;
-      for (uint64_t Index : Indices) {
-        if (!FirstIndex)
+  if (!AE.ContextSizeInfos.empty()) {
+    OS << "\tContextSizeInfo per MIB:\n";
+    for (auto Infos : AE.ContextSizeInfos) {
+      OS << "\t\t";
+      bool FirstInfo = true;
+      for (auto [FullStackId, TotalSize] : Infos) {
+        if (!FirstInfo)
           OS << ", ";
-        FirstIndex = false;
-        OS << Index;
+        FirstInfo = false;
+        OS << "{ " << FullStackId << ", " << TotalSize << " }";
       }
       OS << "\n";
     }
@@ -1447,19 +1442,6 @@ class ModuleSummaryIndex {
   // built via releaseTemporaryMemory.
   DenseMap<uint64_t, unsigned> StackIdToIndex;
 
-  // List of unique ContextTotalSize structs (pair of the full stack id hash and
-  // its associated total profiled size). We use an index into this vector when
-  // referencing from the alloc summary to reduce the overall memory and size
-  // requirements, since often allocations may be duplicated due to inlining.
-  std::vector<ContextTotalSize> ContextSizeInfos;
-
-  // Temporary map while building the ContextSizeInfos list. Clear when index is
-  // completely built via releaseTemporaryMemory.
-  // Maps from full stack id to a map of total size to the assigned index.
-  // We need size in here too because due to stack truncation in the profile we
-  // can have the same full stack id and different sizes.
-  DenseMap<uint64_t, DenseMap<uint64_t, unsigned>> ContextToTotalSizeAndIndex;
-
   // YAML I/O support.
   friend yaml::MappingTraits<ModuleSummaryIndex>;
 
@@ -1504,9 +1486,6 @@ class ModuleSummaryIndex {
   size_t size() const { return GlobalValueMap.size(); }
 
   const std::vector<uint64_t> &stackIds() const { return StackIds; }
-  const std::vector<ContextTotalSize> &contextSizeInfos() const {
-    return ContextSizeInfos;
-  }
 
   unsigned addOrGetStackIdIndex(uint64_t StackId) {
     auto Inserted = StackIdToIndex.insert({StackId, StackIds.size()});
@@ -1520,36 +1499,15 @@ class ModuleSummaryIndex {
     return StackIds[Index];
   }
 
-  unsigned addOrGetContextSizeIndex(ContextTotalSize ContextSizeInfo) {
-    auto &Entry = ContextToTotalSizeAndIndex[ContextSizeInfo.FullStackId];
-    auto Inserted =
-        Entry.insert({ContextSizeInfo.TotalSize, ContextSizeInfos.size()});
-    if (Inserted.second)
-      ContextSizeInfos.push_back(
-          {ContextSizeInfo.FullStackId, ContextSizeInfo.TotalSize});
-    else
-      assert(Inserted.first->first == ContextSizeInfo.TotalSize);
-    return Inserted.first->second;
-  }
-
-  ContextTotalSize getContextSizeInfoAtIndex(unsigned Index) const {
-    assert(ContextSizeInfos.size() > Index);
-    return ContextSizeInfos[Index];
-  }
-
   // Facility to release memory from data structures only needed during index
-  // construction (including while building combined index). Currently this
+  // construction (including while building combined index). Currently this only
   // releases the temporary map used while constructing a correspondence between
-  // stack ids and their index in the StackIds vector, and a similar map used
-  // while constructing the ContextSizeInfos vector. Mostly impactful when
+  // stack ids and their index in the StackIds vector. Mostly impactful when
   // building a large combined index.
   void releaseTemporaryMemory() {
     assert(StackIdToIndex.size() == StackIds.size());
     StackIdToIndex.clear();
     StackIds.shrink_to_fit();
-    assert(ContextToTotalSizeAndIndex.size() == ContextSizeInfos.size());
-    ContextToTotalSizeAndIndex.clear();
-    ContextSizeInfos.shrink_to_fit();
   }
 
   /// Convenience function for doing a DFS on a ValueInfo. Marks the function in

diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -523,7 +523,7 @@ static void computeFunctionSummary(
       if (MemProfMD) {
         std::vector<MIBInfo> MIBs;
         std::vector<uint64_t> TotalSizes;
-        std::vector<std::vector<unsigned>> ContextSizeInfoIndices;
+        std::vector<std::vector<ContextTotalSize>> ContextSizeInfos;
         for (auto &MDOp : MemProfMD->operands()) {
           auto *MIBMD = cast<const MDNode>(MDOp);
           MDNode *StackNode = getMIBStackNode(MIBMD);
@@ -545,7 +545,7 @@ static void computeFunctionSummary(
           // the summary.
           assert(MIBMD->getNumOperands() > 2 || !MemProfReportHintedSizes);
           if (MIBMD->getNumOperands() > 2) {
-            std::vector<unsigned> ContextSizeIndices;
+            std::vector<ContextTotalSize> ContextSizes;
             for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
               MDNode *ContextSizePair = dyn_cast<MDNode>(MIBMD->getOperand(I));
               assert(ContextSizePair->getNumOperands() == 2);
@@ -555,20 +555,18 @@ static void computeFunctionSummary(
               uint64_t TS = mdconst::dyn_extract<ConstantInt>(
                                 ContextSizePair->getOperand(1))
                                 ->getZExtValue();
-              ContextSizeIndices.push_back(
-                  Index.addOrGetContextSizeIndex({FullStackId, TS}));
+              ContextSizes.push_back({FullStackId, TS});
             }
-            ContextSizeInfoIndices.push_back(std::move(ContextSizeIndices));
+            ContextSizeInfos.push_back(std::move(ContextSizes));
           }
           MIBs.push_back(
               MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
         }
         Allocs.push_back(AllocInfo(std::move(MIBs)));
-        assert(!ContextSizeInfoIndices.empty() || !MemProfReportHintedSizes);
-        if (!ContextSizeInfoIndices.empty()) {
-          assert(Allocs.back().MIBs.size() == ContextSizeInfoIndices.size());
-          Allocs.back().ContextSizeInfoIndices =
-              std::move(ContextSizeInfoIndices);
+        assert(!ContextSizeInfos.empty() || !MemProfReportHintedSizes);
+        if (!ContextSizeInfos.empty()) {
+          assert(Allocs.back().MIBs.size() == ContextSizeInfos.size());
+          Allocs.back().ContextSizeInfos = std::move(ContextSizeInfos);
         }
       } else if (!InstCallsite.empty()) {
         SmallVector<unsigned> StackIdIndices;

diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -328,7 +328,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO)
       STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
       STRINGIFY_CODE(FS, STACK_IDS)
-      STRINGIFY_CODE(FS, CONTEXT_SIZE_INFO)
+      STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
     }
   case bitc::METADATA_ATTACHMENT_ID:
     switch (CodeID) {

diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -987,11 +987,6 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
   /// ids from the lists in the callsite and alloc entries to the index.
   std::vector<uint64_t> StackIds;
 
-  // Saves the context total size information from the CONTEXT_SIZE_INFO record
-  // to consult when adding this from the lists in the alloc entries to the
-  // index.
-  std::vector<ContextTotalSize> ContextSizeInfos;
-
 public:
   ModuleSummaryIndexBitcodeReader(
       BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
@@ -7608,6 +7603,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
 
   std::vector<CallsiteInfo> PendingCallsites;
   std::vector<AllocInfo> PendingAllocs;
+  std::vector<uint64_t> PendingContextIds;
 
   while (true) {
     Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
@@ -8002,14 +7998,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       break;
     }
 
-    case bitc::FS_CONTEXT_SIZE_INFO: { // [n x (fullstackid, totalsize)]
-      // Save context size infos in the reader to consult when adding them from
-      // the lists in the alloc node entries.
-      for (auto R = Record.begin(); R != Record.end(); R += 2)
-        ContextSizeInfos.push_back({*R, *(R + 1)});
-      break;
-    }
-
     case bitc::FS_PERMODULE_CALLSITE_INFO: {
       unsigned ValueID = Record[0];
       SmallVector<unsigned> StackIdList;
@@ -8044,6 +8032,16 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       break;
     }
 
+    case bitc::FS_ALLOC_CONTEXT_IDS: {
+      // This is an array of 32-bit fixed-width values, holding each 64-bit
+      // context id as a pair of adjacent (most significant first) 32-bit words.
+      assert(!(Record.size() % 2));
+      PendingContextIds.reserve(Record.size() / 2);
+      for (auto R = Record.begin(); R != Record.end(); R += 2)
+        PendingContextIds.push_back(*R << 32 | *(R + 1));
+      break;
+    }
+
     case bitc::FS_PERMODULE_ALLOC_INFO: {
       unsigned I = 0;
       std::vector<MIBInfo> MIBs;
@@ -8066,30 +8064,40 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
         MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
       }
       // We either have nothing left or at least NumMIBs context size info
-      // indices left.
+      // indices left (for the total sizes included when reporting of hinted
+      // bytes is enabled).
       assert(I == Record.size() || Record.size() - I >= NumMIBs);
-      std::vector<std::vector<unsigned>> AllContextSizeIndices;
+      std::vector<std::vector<ContextTotalSize>> AllContextSizes;
       if (I < Record.size()) {
+        assert(!PendingContextIds.empty() &&
+               "Missing context ids for alloc sizes");
+        unsigned ContextIdIndex = 0;
         MIBsRead = 0;
+        // The sizes are a linearized array of sizes, where for each MIB there
+        // is 1 or more sizes (due to context trimming, each MIB in the metadata
+        // and summarized here can correspond to more than one original context
+        // from the profile).
         while (MIBsRead++ < NumMIBs) {
+          // First read the number of contexts recorded for this MIB.
           unsigned NumContextSizeInfoEntries = Record[I++];
           assert(Record.size() - I >= NumContextSizeInfoEntries);
-          std::vector<unsigned> ContextSizeIndices;
-          ContextSizeIndices.reserve(NumContextSizeInfoEntries);
+          std::vector<ContextTotalSize> ContextSizes;
+          ContextSizes.reserve(NumContextSizeInfoEntries);
           for (unsigned J = 0; J < NumContextSizeInfoEntries; J++) {
-            assert(Record[I] < ContextSizeInfos.size());
-            ContextSizeIndices.push_back(TheIndex.addOrGetContextSizeIndex(
-                ContextSizeInfos[Record[I++]]));
+            assert(ContextIdIndex < PendingContextIds.size());
+            // PendingContextIds read from the preceding FS_ALLOC_CONTEXT_IDS
+            // should be in the same order as the total sizes.
+            ContextSizes.push_back(
+                {PendingContextIds[ContextIdIndex++], Record[I++]});
           }
-          AllContextSizeIndices.push_back(std::move(ContextSizeIndices));
+          AllContextSizes.push_back(std::move(ContextSizes));
         }
+        PendingContextIds.clear();
       }
       PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
-      if (!AllContextSizeIndices.empty()) {
-        assert(PendingAllocs.back().MIBs.size() ==
-               AllContextSizeIndices.size());
-        PendingAllocs.back().ContextSizeInfoIndices =
-            std::move(AllContextSizeIndices);
+      if (!AllContextSizes.empty()) {
+        assert(PendingAllocs.back().MIBs.size() == AllContextSizes.size());
+        PendingAllocs.back().ContextSizeInfos = std::move(AllContextSizes);
       }
       break;
     }