llvm · DataCorrupted · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 22, 2024
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
@@ -8,6 +8,7 @@
 
 #include "BPSectionOrderer.h"
 #include "InputSection.h"
+#include "UnwindInfoSection.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
@@ -60,6 +61,30 @@ getRelocHash(const Reloc &reloc,
   return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
 }
 
+// Get a hash of the unwind info (after relocation).
+// This hash is not 100% accurate, but it's good enough for compression.
+//
+// Unwind info will be eliminated if it is the same with its neighboors.
+// We want to order functions such that the ones with similar unwind info
+// can stay together.
+// See more here:
+// https://faultlore.com/blah/compact-unwinding/#page-tables
+static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
+  for (Symbol *sym : isec->symbols) {
+    if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+      if (!d->unwindEntry())
+        continue;
+      CompactUnwindEntry cu;
+      cu.relocateOneCompactUnwindEntry(d);
+      if (cu.lsda)
+        return xxHash64("HAS LSDA");
+      StringRef name = (cu.personality) ? cu.personality->getName() : "<null>";
+      return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
+    }
+  }
+  return 0;
+}
+
 static void constructNodesForCompression(
     const SmallVector<const InputSection *> &sections,
     const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
@@ -76,6 +101,8 @@ static void constructNodesForCompression(
     const auto *isec = sections[sectionIdx];
     constexpr unsigned windowSize = 4;
 
+    hashes.push_back(getUnwindInfoEncodingHash(isec));
+
     for (size_t i = 0; i < isec->data.size(); i++) {
       auto window = isec->data.drop_front(i).take_front(windowSize);
       hashes.push_back(xxHash64(window));

diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
@@ -109,14 +109,54 @@ CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);
 
 #undef FOR_EACH_CU_FIELD
 
-// LLD's internal representation of a compact unwind entry.
-struct CompactUnwindEntry {
-  uint64_t functionAddress;
-  uint32_t functionLength;
-  compact_unwind_encoding_t encoding;
-  Symbol *personality;
-  InputSection *lsda;
-};
+void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
+    const Defined *d) {
+  functionAddress = d->getVA();
+
+  ConcatInputSection *unwindEntry = d->unwindEntry();
+  if (!unwindEntry)
+    return;
+
+  // If we have DWARF unwind info, create a slimmed-down CU entry that points
+  // to it.
+  if (unwindEntry->getName() == section_names::ehFrame) {
+    // The unwinder will look for the DWARF entry starting at the hint,
+    // assuming the hint points to a valid CFI record start. If it
+    // fails to find the record, it proceeds in a linear search through the
+    // contiguous CFI records from the hint until the end of the section.
+    // Ideally, in the case where the offset is too large to be encoded, we
+    // would instead encode the largest possible offset to a valid CFI record,
+    // but since we don't keep track of that, just encode zero -- the start of
+    // the section is always the start of a CFI record.
+    uint64_t dwarfOffsetHint = unwindEntry->outSecOff <= DWARF_SECTION_OFFSET
+                                   ? unwindEntry->outSecOff
+                                   : 0;
+    encoding = target->modeDwarfEncoding | dwarfOffsetHint;
+    const FDE &fde = cast<ObjFile>(d->getFile())->fdes[unwindEntry];
+    functionLength = fde.funcLength;
+    // Omit the DWARF personality from compact-unwind entry so that we
+    // don't need to encode it.
+    personality = nullptr;
+    lsda = fde.lsda;
+    return;
+  }
+
+  assert(unwindEntry->getName() == section_names::compactUnwind);
+
+  CompactUnwindLayout cuLayout(target->wordSize);
+  auto buf = reinterpret_cast<const uint8_t *>(unwindEntry->data.data()) -
+             target->wordSize;
+  functionLength =
+      support::endian::read32le(buf + cuLayout.functionLengthOffset);
+  encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
+  for (const Reloc &r : unwindEntry->relocs) {
+    if (r.offset == cuLayout.personalityOffset)
+      personality = r.referent.get<Symbol *>();
+    else if (r.offset == cuLayout.lsdaOffset)
+      lsda = r.getReferentInputSection();
+  }
+  return;
+}
 
 using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
 
@@ -349,51 +389,7 @@ Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) {
 void UnwindInfoSectionImpl::relocateCompactUnwind(
     std::vector<CompactUnwindEntry> &cuEntries) {
   parallelFor(0, symbolsVec.size(), [&](size_t i) {
-    CompactUnwindEntry &cu = cuEntries[i];
-    const Defined *d = symbolsVec[i].second;
-    cu.functionAddress = d->getVA();
-    if (!d->unwindEntry())
-      return;
-
-    // If we have DWARF unwind info, create a slimmed-down CU entry that points
-    // to it.
-    if (d->unwindEntry()->getName() == section_names::ehFrame) {
-      // The unwinder will look for the DWARF entry starting at the hint,
-      // assuming the hint points to a valid CFI record start. If it
-      // fails to find the record, it proceeds in a linear search through the
-      // contiguous CFI records from the hint until the end of the section.
-      // Ideally, in the case where the offset is too large to be encoded, we
-      // would instead encode the largest possible offset to a valid CFI record,
-      // but since we don't keep track of that, just encode zero -- the start of
-      // the section is always the start of a CFI record.
-      uint64_t dwarfOffsetHint =
-          d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
-              ? d->unwindEntry()->outSecOff
-              : 0;
-      cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint;
-      const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
-      cu.functionLength = fde.funcLength;
-      // Omit the DWARF personality from compact-unwind entry so that we
-      // don't need to encode it.
-      cu.personality = nullptr;
-      cu.lsda = fde.lsda;
-      return;
-    }
-
-    assert(d->unwindEntry()->getName() == section_names::compactUnwind);
-
-    auto buf =
-        reinterpret_cast<const uint8_t *>(d->unwindEntry()->data.data()) -
-        target->wordSize;
-    cu.functionLength =
-        support::endian::read32le(buf + cuLayout.functionLengthOffset);
-    cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
-    for (const Reloc &r : d->unwindEntry()->relocs) {
-      if (r.offset == cuLayout.personalityOffset)
-        cu.personality = r.referent.get<Symbol *>();
-      else if (r.offset == cuLayout.lsdaOffset)
-        cu.lsda = r.getReferentInputSection();
-    }
+    cuEntries[i].relocateOneCompactUnwindEntry(symbolsVec[i].second);
   });
 }
 

diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
@@ -34,6 +34,18 @@ class UnwindInfoSection : public SyntheticSection {
 
 UnwindInfoSection *makeUnwindInfoSection();
 
+// LLD's internal representation of a compact unwind entry.
+struct CompactUnwindEntry {
+  uint64_t functionAddress;
+  uint32_t functionLength;
+  compact_unwind_encoding_t encoding;
+  Symbol *personality;
+  InputSection *lsda;
+
+  // Relocate the entry to the given Symbol.
+  void relocateOneCompactUnwindEntry(const Defined *d);
+};
+
 } // namespace lld::macho
 
 #endif
diff --git a/lld/test/MachO/bp-section-orderer-stress.s b/lld/test/MachO/bp-section-orderer-stress.s
@@ -29,11 +29,15 @@ profiled_functions = function_names[: int(num_functions / 2)]
 function_contents = [
     f"""
 {name}:
+  .cfi_startproc
+  .cfi_personality 155, _personality_{i % 5}
+  .cfi_lsda 16, _exception{i % 3}
   add w0, w0, #{i % 4096}
   add w1, w1, #{i % 10}
   add w2, w0, #{i % 20}
   adrp x3, {name}@PAGE
   ret
+  .cfi_endproc
 """
     for i, name in enumerate(function_names)
 ]
@@ -78,6 +82,26 @@ with open(assembly_filepath, "w") as f:
 _main:
   ret
 
+_personality_0:
+  ret
+_personality_1:
+  ret
+_personality_2:
+  ret
+_personality_3:
+  ret
+_personality_4:
+  ret
+
+_exception0:
+  .quad 0x4200
+
+_exception1:
+  .quad 0x4210
+
+_exception2:
+  .quad 0x4220
+
 {"".join(function_contents)}
 
 .data