[lld] Add flag to order any section for compression

ellishg · ellishg · commit e23ea2413578 · 2025-11-14T15:24:05.000-08:00
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
@@ -29,8 +29,9 @@ struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
   DenseMap<const InputSectionBase *, Defined *> secToSym;
 
   static uint64_t getSize(const Section &sec) { return sec.getSize(); }
-  static bool isCodeSection(const Section &sec) {
-    return sec.flags & ELF::SHF_EXECINSTR;
+  static std::string getSectionName(const Section &sec) {
+    // return (sec.getSegName() + sec.getName()).str();
+    return "TODO";
   }
   ArrayRef<Defined *> getSymbols(const Section &sec) {
     auto it = secToSym.find(&sec);
@@ -94,8 +95,7 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
   for (ELFFileBase *file : ctx.objectFiles)
     for (Symbol *sym : file->getLocalSymbols())
       addSection(*sym);
-  return orderer.computeOrder(profilePath, forFunctionCompression,
-                              forDataCompression,
+  return orderer.computeOrder(profilePath, {/*TODO*/},
                               compressionSortStartupFunctions, verbose,
                               sections, rootSymbolToSectionIdxs);
 }
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
@@ -31,8 +31,8 @@ template <> struct lld::BPOrdererTraits<struct BPOrdererMachO> {
 namespace {
 struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
   static uint64_t getSize(const Section &sec) { return sec.getSize(); }
-  static bool isCodeSection(const Section &sec) {
-    return macho::isCodeSection(&sec);
+  static std::string getSectionName(const Section &sec) {
+    return (sec.getSegName() + sec.getName()).str();
   }
   static ArrayRef<Defined *> getSymbols(const Section &sec) {
     return sec.symbols;
@@ -107,7 +107,7 @@ struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
 } // namespace
 
 DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
-    StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
+    StringRef profilePath, ArrayRef<GlobPattern> compressionSortSectionGlobs,
     bool compressionSortStartupFunctions, bool verbose) {
   // Collect candidate sections and associated symbols.
   SmallVector<InputSection *> sections;
@@ -140,8 +140,7 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
     }
   }
 
-  return BPOrdererMachO().computeOrder(profilePath, forFunctionCompression,
-                                       forDataCompression,
+  return BPOrdererMachO().computeOrder(profilePath, compressionSortSectionGlobs,
                                        compressionSortStartupFunctions, verbose,
                                        sections, rootSymbolToSectionIdxs);
 }
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
@@ -14,8 +14,10 @@
 #ifndef LLD_MACHO_BPSECTION_ORDERER_H
 #define LLD_MACHO_BPSECTION_ORDERER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/GlobPattern.h"
 
 namespace lld::macho {
 class InputSection;
@@ -25,10 +27,10 @@ class InputSection;
 ///
 /// It is important that .subsections_via_symbols is used to ensure functions
 /// and data are in their own sections and thus can be reordered.
-llvm::DenseMap<const InputSection *, int>
-runBalancedPartitioning(llvm::StringRef profilePath,
-                        bool forFunctionCompression, bool forDataCompression,
-                        bool compressionSortStartupFunctions, bool verbose);
+llvm::DenseMap<const InputSection *, int> runBalancedPartitioning(
+    llvm::StringRef profilePath,
+    llvm::ArrayRef<llvm::GlobPattern> compressionSortSectionGlobs,
+    bool compressionSortStartupFunctions, bool verbose);
 
 } // namespace lld::macho
 
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
@@ -231,8 +231,8 @@ struct Configuration {
   llvm::StringRef irpgoProfilePath;
   bool bpStartupFunctionSort = false;
   bool bpCompressionSortStartupFunctions = false;
-  bool bpFunctionOrderForCompression = false;
-  bool bpDataOrderForCompression = false;
+  std::vector<std::string> bpCompressionSortSections;
+  llvm::SmallVector<llvm::GlobPattern> bpCompressionSortSectionGlobs;
   bool bpVerboseSectionOrderer = false;
 
   SectionRenameMap sectionRenameMap;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
@@ -2034,20 +2034,32 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
   if (config->irpgoProfilePath.empty() && config->bpStartupFunctionSort)
     error("--bp-startup-sort=function must be used with "
           "--irpgo-profile");
+  config->bpCompressionSortSections =
+      args.getAllArgValues(OPT_bp_compression_sort_section);
+  auto addCompressionSectionGlob = [&](StringRef s) {
+    auto patOrErr = GlobPattern::create(s);
+    if (patOrErr) {
+      config->bpCompressionSortSectionGlobs.push_back(std::move(*patOrErr));
+    } else {
+      error("--bp-compression-sort-sections: " +
+            toString(patOrErr.takeError()));
+    }
+  };
+  for (StringRef s : config->bpCompressionSortSections)
+    addCompressionSectionGlob(s);
   if (const Arg *arg = args.getLastArg(OPT_bp_compression_sort)) {
     StringRef compressionSortStr = arg->getValue();
     if (compressionSortStr == "function") {
-      config->bpFunctionOrderForCompression = true;
+      addCompressionSectionGlob("__TEXT*");
     } else if (compressionSortStr == "data") {
-      config->bpDataOrderForCompression = true;
+      addCompressionSectionGlob("__DATA*");
     } else if (compressionSortStr == "both") {
-      config->bpFunctionOrderForCompression = true;
-      config->bpDataOrderForCompression = true;
+      addCompressionSectionGlob("*");
     } else if (compressionSortStr != "none") {
       error("unknown value `" + compressionSortStr + "` for " +
             arg->getSpelling());
     }
-    if (compressionSortStr != "none")
+    if (!config->bpCompressionSortSectionGlobs.empty())
       IncompatWithCGSort(arg->getSpelling());
   }
   config->bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
@@ -147,9 +147,17 @@ def bp_compression_sort_startup_functions: Flag<["--"], "bp-compression-sort-sta
     Group<grp_lld>;
 def no_bp_compression_sort_startup_functions: Flag<["--"], "no-bp-compression-sort-startup-functions">,
     HelpText<"Do not order startup function for compression">, Group<grp_lld>;
-def bp_compression_sort: Joined<["--"], "bp-compression-sort=">,
-    MetaVarName<"[none,function,data,both]">,
-    HelpText<"Order sections to improve compressed size">, Group<grp_lld>;
+def bp_compression_sort
+    : Joined<["--"], "bp-compression-sort=">,
+      MetaVarName<"[none,function,data,both]">,
+      HelpText<"Order sections to improve compressed size. Deprecated. Please "
+               "use --bp-compression-sort-section">,
+      Group<grp_lld>;
+def bp_compression_sort_section
+    : Joined<["--"], "bp-compression-sort-section=">,
+      MetaVarName<"<section-glob>">,
+      HelpText<"Order <section-glob> for optimal compressed size">,
+      Group<grp_lld>;
 def irpgo_profile_sort: Separate<["--"], "irpgo-profile-sort">, Group<grp_lld>;
 def irpgo_profile_sort_eq: Joined<["--"], "irpgo-profile-sort=">,
     Alias<!cast<Separate>(irpgo_profile_sort)>, MetaVarName<"<profile>">,
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
@@ -363,13 +363,12 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
 DenseMap<const InputSection *, int>
 macho::PriorityBuilder::buildInputSectionPriorities() {
   DenseMap<const InputSection *, int> sectionPriorities;
-  if (config->bpStartupFunctionSort || config->bpFunctionOrderForCompression ||
-      config->bpDataOrderForCompression) {
+  if (config->bpStartupFunctionSort ||
+      !config->bpCompressionSortSectionGlobs.empty()) {
     TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
     sectionPriorities = runBalancedPartitioning(
         config->bpStartupFunctionSort ? config->irpgoProfilePath : "",
-        config->bpFunctionOrderForCompression,
-        config->bpDataOrderForCompression,
+        config->bpCompressionSortSectionGlobs,
         config->bpCompressionSortStartupFunctions,
         config->bpVerboseSectionOrderer);
   } else if (config->callGraphProfileSort) {
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -58,12 +58,13 @@ template <class D> struct BPOrderer {
   //   program startup.
   // * compressionSortStartupFunctions: if profilePath is specified, allocate
   //   extra utility vertices to prioritize nearby function similarity.
-  auto computeOrder(llvm::StringRef profilePath, bool forFunctionCompression,
-                    bool forDataCompression,
-                    bool compressionSortStartupFunctions, bool verbose,
-                    llvm::ArrayRef<Section *> sections,
-                    const DenseMap<CachedHashStringRef, std::set<unsigned>>
-                        &rootSymbolToSectionIdxs)
+  auto
+  computeOrder(llvm::StringRef profilePath,
+               llvm::ArrayRef<llvm::GlobPattern> compressionSortSectionGlobs,
+               bool compressionSortStartupFunctions, bool verbose,
+               llvm::ArrayRef<Section *> sections,
+               const DenseMap<CachedHashStringRef, std::set<unsigned>>
+                   &rootSymbolToSectionIdxs)
       -> llvm::DenseMap<const Section *, int>;
 
   std::optional<StringRef> static getResolvedLinkageName(StringRef name) {
@@ -150,7 +151,7 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
 
 template <class D>
 auto BPOrderer<D>::computeOrder(
-    StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
+    StringRef profilePath, ArrayRef<GlobPattern> compressionSortSectionGlobs,
     bool compressionSortStartupFunctions, bool verbose,
     ArrayRef<Section *> sections,
     const DenseMap<CachedHashStringRef, std::set<unsigned>>
@@ -221,19 +222,20 @@ auto BPOrderer<D>::computeOrder(
     }
   }
 
-  SmallVector<unsigned> sectionIdxsForFunctionCompression,
-      sectionIdxsForDataCompression;
+  // Using map<> to guarantee that iteration order is deterministic
+  std::map<std::string, SmallVector<unsigned>> sectionNameToIdxsForCompression;
   for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
     if (startupSectionIdxUNs.count(sectionIdx))
       continue;
     const auto *isec = sections[sectionIdx];
-    if (D::isCodeSection(*isec)) {
-      if (forFunctionCompression)
-        sectionIdxsForFunctionCompression.push_back(sectionIdx);
-    } else {
-      if (forDataCompression)
-        sectionIdxsForDataCompression.push_back(sectionIdx);
-    }
+    std::string sectionName = D::getSectionName(*isec);
+    bool isMatch =
+        llvm::any_of(compressionSortSectionGlobs, [&](const GlobPattern &glob) {
+          return glob.match(sectionName);
+        });
+    if (!isMatch)
+      continue;
+    sectionNameToIdxsForCompression[sectionName].push_back(sectionIdx);
   }
 
   if (compressionSortStartupFunctions) {
@@ -252,51 +254,43 @@ auto BPOrderer<D>::computeOrder(
     }
   }
 
-  // Map a section index (order directly) to a list of duplicate section indices
-  // (not ordered directly).
+  // Map a section index (ordered directly) to a list of duplicate section
+  // indices (not ordered directly).
   DenseMap<unsigned, SmallVector<unsigned, 0>> duplicateSectionIdxs;
-  auto unsForFunctionCompression = getUnsForCompression<D>(
-      sections, sectionToIdx, sectionIdxsForFunctionCompression,
-      &duplicateSectionIdxs, maxUN);
-  auto unsForDataCompression = getUnsForCompression<D>(
-      sections, sectionToIdx, sectionIdxsForDataCompression,
-      &duplicateSectionIdxs, maxUN);
-
-  std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
-      nodesForDataCompression;
+  SmallVector<std::vector<BPFunctionNode>> nodesForCompression;
+  for (auto &[name, idxs] : sectionNameToIdxsForCompression) {
+    auto &nodes = nodesForCompression.emplace_back();
+    auto uns = getUnsForCompression<D>(sections, sectionToIdx, idxs,
+                                       &duplicateSectionIdxs, maxUN);
+    for (auto &[sectionIdx, uns] : uns)
+      nodes.emplace_back(sectionIdx, uns);
+    // Sort compression nodes by their Id (which is the section index) because
+    // the input linker order tends to be not bad.
+    llvm::sort(nodes, [](auto &L, auto &R) { return L.Id < R.Id; });
+  }
+
+  std::vector<BPFunctionNode> nodesForStartup;
   for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
     nodesForStartup.emplace_back(sectionIdx, uns);
-  for (auto &[sectionIdx, uns] : unsForFunctionCompression)
-    nodesForFunctionCompression.emplace_back(sectionIdx, uns);
-  for (auto &[sectionIdx, uns] : unsForDataCompression)
-    nodesForDataCompression.emplace_back(sectionIdx, uns);
 
   // Use the first timestamp to define the initial order for startup nodes.
   llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
     return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
            std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
   });
-  // Sort compression nodes by their Id (which is the section index) because the
-  // input linker order tends to be not bad.
-  llvm::sort(nodesForFunctionCompression,
-             [](auto &L, auto &R) { return L.Id < R.Id; });
-  llvm::sort(nodesForDataCompression,
-             [](auto &L, auto &R) { return L.Id < R.Id; });
 
   {
     TimeTraceScope timeScope("Balanced Partitioning");
     BalancedPartitioningConfig config;
     BalancedPartitioning bp(config);
     bp.run(nodesForStartup);
-    bp.run(nodesForFunctionCompression);
-    bp.run(nodesForDataCompression);
+    for (auto &nodes : nodesForCompression)
+      bp.run(nodes);
   }
 
   unsigned numStartupSections = 0, startupSize = 0;
-  unsigned numCodeCompressionSections = 0, codeCompressionSize = 0;
-  unsigned numDuplicateCodeSections = 0, duplicateCodeSize = 0;
-  unsigned numDataCompressionSections = 0, dataCompressionSize = 0;
-  unsigned numDuplicateDataSections = 0, duplicateDataSize = 0;
+  unsigned numCompressionSections = 0, compressionSize = 0;
+  unsigned numDuplicateCompressionSections = 0, duplicateCompressionSize = 0;
   SetVector<const Section *> orderedSections;
   // Order startup functions,
   for (auto &node : nodesForStartup) {
@@ -306,63 +300,49 @@ auto BPOrderer<D>::computeOrder(
       ++numStartupSections;
     }
   }
-  // then functions for compression,
-  for (auto &node : nodesForFunctionCompression) {
-    const auto *isec = sections[node.Id];
-    if (orderedSections.insert(isec)) {
-      codeCompressionSize += D::getSize(*isec);
-      ++numCodeCompressionSections;
-    }
-    auto It = duplicateSectionIdxs.find(node.Id);
-    if (It == duplicateSectionIdxs.end())
-      continue;
-    for (auto dupSecIdx : It->getSecond()) {
-      const auto *dupIsec = sections[dupSecIdx];
-      if (orderedSections.insert(dupIsec)) {
-        duplicateCodeSize += D::getSize(*dupIsec);
-        ++numDuplicateCodeSections;
+  // then sections for compression.
+  for (const auto &nodes : nodesForCompression) {
+    for (auto &node : nodes) {
+      const auto *isec = sections[node.Id];
+      if (orderedSections.insert(isec)) {
+        compressionSize += D::getSize(*isec);
+        ++numCompressionSections;
       }
-    }
-  }
-  // then data for compression.
-  for (auto &node : nodesForDataCompression) {
-    const auto *isec = sections[node.Id];
-    if (orderedSections.insert(isec)) {
-      dataCompressionSize += D::getSize(*isec);
-      ++numDataCompressionSections;
-    }
-    auto It = duplicateSectionIdxs.find(node.Id);
-    if (It == duplicateSectionIdxs.end())
-      continue;
-    for (auto dupSecIdx : It->getSecond()) {
-      const auto *dupIsec = sections[dupSecIdx];
-      if (orderedSections.insert(dupIsec)) {
-        duplicateDataSize += D::getSize(*dupIsec);
-        ++numDuplicateDataSections;
+      auto It = duplicateSectionIdxs.find(node.Id);
+      if (It == duplicateSectionIdxs.end())
+        continue;
+      for (auto dupSecIdx : It->getSecond()) {
+        const auto *dupIsec = sections[dupSecIdx];
+        if (orderedSections.insert(dupIsec)) {
+          duplicateCompressionSize += D::getSize(*dupIsec);
+          ++numDuplicateCompressionSections;
+        }
       }
     }
   }
 
   if (verbose) {
-    unsigned numTotalOrderedSections =
-        numStartupSections + numCodeCompressionSections +
-        numDuplicateCodeSections + numDataCompressionSections +
-        numDuplicateDataSections;
-    unsigned totalOrderedSize = startupSize + codeCompressionSize +
-                                duplicateCodeSize + dataCompressionSize +
-                                duplicateDataSize;
+    unsigned numTotalOrderedSections = numStartupSections +
+                                       numCompressionSections +
+                                       numDuplicateCompressionSections;
+    unsigned totalOrderedSize =
+        startupSize + compressionSize + duplicateCompressionSize;
+    SmallVector<StringRef> sectionNames;
+    for (auto &[name, idxs] : sectionNameToIdxsForCompression)
+      sectionNames.push_back(name);
+    llvm::sort(sectionNames);
     dbgs() << "Ordered " << numTotalOrderedSections << " sections ("
            << totalOrderedSize << " bytes) using balanced partitioning:\n";
     dbgs() << "  Functions for startup: " << numStartupSections << " ("
            << startupSize << " bytes)\n";
-    dbgs() << "  Functions for compression: " << numCodeCompressionSections
-           << " (" << codeCompressionSize << " bytes)\n";
-    dbgs() << "  Duplicate functions: " << numDuplicateCodeSections << " ("
-           << duplicateCodeSize << " bytes)\n";
-    dbgs() << "  Data for compression: " << numDataCompressionSections << " ("
-           << dataCompressionSize << " bytes)\n";
-    dbgs() << "  Duplicate data: " << numDuplicateDataSections << " ("
-           << duplicateDataSize << " bytes)\n";
+    dbgs() << "  Sections for compression: " << numCompressionSections << " ("
+           << compressionSize << " bytes)\n    ";
+    for (StringRef name : sectionNames)
+      dbgs() << name << " ";
+    dbgs() << "\n";
+    dbgs() << "  Duplicate compression sections: "
+           << numDuplicateCompressionSections << " ("
+           << duplicateCompressionSize << " bytes)\n";
 
     if (!profilePath.empty()) {
       // Evaluate this function order for startup
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
diff --git a/lld/test/MachO/compression-order-sections.s b/lld/test/MachO/compression-order-sections.s