[LLD, MachO] Add `getUnwindInfoEncodingHash` to `BPSectionOrderer.cpp`. #105587

DataCorrupted · 2024-08-21T21:13:39Z

We have seen that the order of UnwindInfo can reduce the size of the final binary [1]. To harness that, we add the hash of unwind info (after relocation) to the BPSectionOrderer

@ellishg Note: lsda and personality could be set before relocation, we can't capture that during hashing.

llvmbot · 2024-08-21T21:13:59Z

@llvm/pr-subscribers-lld

@llvm/pr-subscribers-lld-macho

Author: Peter Rong (DataCorrupted)

Changes

We have seen that the order of UnwindInfo can change the size of the final binary [1]. To harness that, we add the hash of unwind info (after relocation) to the BPSectionOrderer

@ellishg Note: lsda and personality could be set already during actual relocation, we can't capture that during hashing.

Full diff: https://github.com/llvm/llvm-project/pull/105587.diff

3 Files Affected:

(modified) lld/MachO/BPSectionOrderer.cpp (+22)
(modified) lld/MachO/UnwindInfoSection.cpp (+48-47)
(modified) lld/MachO/UnwindInfoSection.h (+11)

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 568843d72bbb50..350b7498a03ce1 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -8,6 +8,7 @@
 
 #include "BPSectionOrderer.h"
 #include "InputSection.h"
+#include "UnwindInfoSection.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
@@ -60,6 +61,25 @@ getRelocHash(const Reloc &reloc,
   return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
 }
 
+static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
+  for (Symbol *sym : isec->symbols) {
+    if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+      if (auto *ue = d->unwindEntry()) {
+        CompactUnwindEntry cu;
+        cu.relocateOneCompactUnwindEntry(d);
+        if (cu.lsda)
+          return xxHash64("HAS LSDA");
+        StringRef name = cu.personality ? cu.personality->getName().empty()
+                                              ? "<unnamed>"
+                                              : cu.personality->getName()
+                                        : "<none>";
+        return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
+      }
+    }
+  }
+  return 0;
+}
+
 static void constructNodesForCompression(
     const SmallVector<const InputSection *> &sections,
     const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
@@ -76,6 +96,8 @@ static void constructNodesForCompression(
     const auto *isec = sections[sectionIdx];
     constexpr unsigned windowSize = 4;
 
+    hashes.push_back(getUnwindInfoEncodingHash(isec));
+
     for (size_t i = 0; i < isec->data.size(); i++) {
       auto window = isec->data.drop_front(i).take_front(windowSize);
       hashes.push_back(xxHash64(window));
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 7033481d6014b5..4f55af29b33140 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -109,14 +109,53 @@ CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);
 
 #undef FOR_EACH_CU_FIELD
 
-// LLD's internal representation of a compact unwind entry.
-struct CompactUnwindEntry {
-  uint64_t functionAddress;
-  uint32_t functionLength;
-  compact_unwind_encoding_t encoding;
-  Symbol *personality;
-  InputSection *lsda;
-};
+void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
+    const Defined *d) {
+  ConcatInputSection *unwindEntry = d->unwindEntry();
+  assert(unwindEntry);
+
+  functionAddress = d->getVA();
+  // If we have DWARF unwind info, create a slimmed-down CU entry that points
+  // to it.
+  if (unwindEntry->getName() == section_names::ehFrame) {
+    // The unwinder will look for the DWARF entry starting at the hint,
+    // assuming the hint points to a valid CFI record start. If it
+    // fails to find the record, it proceeds in a linear search through the
+    // contiguous CFI records from the hint until the end of the section.
+    // Ideally, in the case where the offset is too large to be encoded, we
+    // would instead encode the largest possible offset to a valid CFI record,
+    // but since we don't keep track of that, just encode zero -- the start of
+    // the section is always the start of a CFI record.
+    uint64_t dwarfOffsetHint =
+        d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
+            ? d->unwindEntry()->outSecOff
+            : 0;
+    encoding = target->modeDwarfEncoding | dwarfOffsetHint;
+    const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
+    functionLength = fde.funcLength;
+    // Omit the DWARF personality from compact-unwind entry so that we
+    // don't need to encode it.
+    personality = nullptr;
+    lsda = fde.lsda;
+    return;
+  }
+
+  assert(unwindEntry->getName() == section_names::compactUnwind);
+
+  CompactUnwindLayout cuLayout(target->wordSize);
+  auto buf = reinterpret_cast<const uint8_t *>(unwindEntry->data.data()) -
+             target->wordSize;
+  functionLength =
+      support::endian::read32le(buf + cuLayout.functionLengthOffset);
+  encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
+  for (const Reloc &r : unwindEntry->relocs) {
+    if (r.offset == cuLayout.personalityOffset)
+      personality = r.referent.get<Symbol *>();
+    else if (r.offset == cuLayout.lsdaOffset)
+      lsda = r.getReferentInputSection();
+  }
+  return;
+}
 
 using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
 
@@ -355,45 +394,7 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
     if (!d->unwindEntry())
       return;
 
-    // If we have DWARF unwind info, create a slimmed-down CU entry that points
-    // to it.
-    if (d->unwindEntry()->getName() == section_names::ehFrame) {
-      // The unwinder will look for the DWARF entry starting at the hint,
-      // assuming the hint points to a valid CFI record start. If it
-      // fails to find the record, it proceeds in a linear search through the
-      // contiguous CFI records from the hint until the end of the section.
-      // Ideally, in the case where the offset is too large to be encoded, we
-      // would instead encode the largest possible offset to a valid CFI record,
-      // but since we don't keep track of that, just encode zero -- the start of
-      // the section is always the start of a CFI record.
-      uint64_t dwarfOffsetHint =
-          d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
-              ? d->unwindEntry()->outSecOff
-              : 0;
-      cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint;
-      const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
-      cu.functionLength = fde.funcLength;
-      // Omit the DWARF personality from compact-unwind entry so that we
-      // don't need to encode it.
-      cu.personality = nullptr;
-      cu.lsda = fde.lsda;
-      return;
-    }
-
-    assert(d->unwindEntry()->getName() == section_names::compactUnwind);
-
-    auto buf =
-        reinterpret_cast<const uint8_t *>(d->unwindEntry()->data.data()) -
-        target->wordSize;
-    cu.functionLength =
-        support::endian::read32le(buf + cuLayout.functionLengthOffset);
-    cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
-    for (const Reloc &r : d->unwindEntry()->relocs) {
-      if (r.offset == cuLayout.personalityOffset)
-        cu.personality = r.referent.get<Symbol *>();
-      else if (r.offset == cuLayout.lsdaOffset)
-        cu.lsda = r.getReferentInputSection();
-    }
+    cu.relocateOneCompactUnwindEntry(d);
   });
 }
 
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 826573b0c44a00..1cdfa3b3d02753 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -34,6 +34,17 @@ class UnwindInfoSection : public SyntheticSection {
 
 UnwindInfoSection *makeUnwindInfoSection();
 
+// LLD's internal representation of a compact unwind entry.
+struct CompactUnwindEntry {
+  uint64_t functionAddress;
+  uint32_t functionLength;
+  compact_unwind_encoding_t encoding;
+  Symbol *personality;
+  InputSection *lsda;
+
+  void relocateOneCompactUnwindEntry(const Defined *d);
+};
+
 } // namespace lld::macho
 
 #endif

lld/MachO/UnwindInfoSection.h

lld/MachO/BPSectionOrderer.cpp

We have seen that the order of UnwindInfo can change the size of the final binary. To harness that, we add the hash of unwind info (after relocation) to the BPSectionOrdere Signed-off-by: Peter Rong <[email protected]>

Signed-off-by: Peter Rong <[email protected]>

lld/MachO/BPSectionOrderer.cpp

Signed-off-by: Peter Rong <[email protected]>

lld/MachO/BPSectionOrderer.cpp

Co-authored-by: Ellis Hoag <[email protected]>

oontvoo · 2024-08-23T13:02:04Z

lld/MachO/BPSectionOrderer.cpp

 }

+// Get a hash of the unwind info (after relocation).
+// This hash is not 100% accurate, but it's good enough for compression.


nit: s/accurate/unique? (Or representative?)
(I'm not sure saying a hash string being "accurate" makes sense )

On the similar topic, it's possible to have more than one personality symbol with the same name, with one being from the dynamic library (eg., libc++) and the other being a user-defined one. (We've seen this in the past use cases).
Should their symbol type be included in the hash?

One key point is we want these hashes to be deterministic. Otherwise builds could end up with non-determinstic function order, which is what the lld/test/MachO/bp-section-orderer-stress.s aims to prevent. So this is why we cannot simply hash the personality pointer, even though that is what we'd like to do. I think it makes sense to include the symbol type in the hash if that's possible.

We are hashing personality->getName() already so it should be deterministic.

In the event that both lsda and personality exist, I'm proposing we use Hash(lsda->getName() + personality->getName() + utohexstr(encoding)) (name is "" if it is a nullpointer)

@ellishg

nit: s/accurate/unique? (Or representative?)

I think I'll just remove that line of comment to avoid confusion.

…ight be invalid. Signed-off-by: Peter Rong <[email protected]>

DataCorrupted added lld lld:MachO labels Aug 21, 2024

DataCorrupted requested review from alx32 and ellishg August 21, 2024 21:13

DataCorrupted self-assigned this Aug 21, 2024

ellishg reviewed Aug 21, 2024

View reviewed changes

lld/MachO/UnwindInfoSection.h Show resolved Hide resolved

lld/MachO/BPSectionOrderer.cpp Show resolved Hide resolved

DataCorrupted added 3 commits August 21, 2024 16:06

[LLD, MachO] Add getUnwindInfoEncodingHash to BPSectionOrderer.cpp.

2c43e32

We have seen that the order of UnwindInfo can change the size of the final binary. To harness that, we add the hash of unwind info (after relocation) to the BPSectionOrdere Signed-off-by: Peter Rong <[email protected]>

refactor

67e7f61

Signed-off-by: Peter Rong <[email protected]>

update tests and comments

9e9d15a

Signed-off-by: Peter Rong <[email protected]>

DataCorrupted force-pushed the HashUnwindInfo branch from 67ceff0 to 9e9d15a Compare August 21, 2024 23:06

DataCorrupted marked this pull request as ready for review August 21, 2024 23:07

oontvoo reviewed Aug 22, 2024

View reviewed changes

lld/MachO/BPSectionOrderer.cpp Outdated Show resolved Hide resolved

lld/MachO/BPSectionOrderer.cpp Show resolved Hide resolved

personality can't be empty

b4f7ddf

Signed-off-by: Peter Rong <[email protected]>

ellishg reviewed Aug 22, 2024

View reviewed changes

lld/MachO/BPSectionOrderer.cpp Outdated Show resolved Hide resolved

lld/MachO/BPSectionOrderer.cpp Outdated Show resolved Hide resolved

DataCorrupted and others added 2 commits August 22, 2024 16:17

Update lld/MachO/BPSectionOrderer.cpp

2170708

Co-authored-by: Ellis Hoag <[email protected]>

Update lld/MachO/BPSectionOrderer.cpp

90fd2fc

Co-authored-by: Ellis Hoag <[email protected]>

oontvoo reviewed Aug 23, 2024

View reviewed changes

Update relocateOneCompactUnwindEntry, don't get address in it, it m…

93b717b

…ight be invalid. Signed-off-by: Peter Rong <[email protected]>

DataCorrupted closed this Feb 6, 2025

DataCorrupted deleted the HashUnwindInfo branch February 6, 2025 17:43

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[LLD, MachO] Add `getUnwindInfoEncodingHash` to `BPSectionOrderer.cpp`. #105587

[LLD, MachO] Add `getUnwindInfoEncodingHash` to `BPSectionOrderer.cpp`. #105587

Uh oh!

DataCorrupted commented Aug 21, 2024 •

edited

Loading

Uh oh!

llvmbot commented Aug 21, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

oontvoo Aug 23, 2024

Uh oh!

ellishg Aug 23, 2024

Uh oh!

DataCorrupted Aug 23, 2024 •

edited

Loading

Uh oh!

DataCorrupted Aug 23, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[LLD, MachO] Add getUnwindInfoEncodingHash to BPSectionOrderer.cpp. #105587

[LLD, MachO] Add getUnwindInfoEncodingHash to BPSectionOrderer.cpp. #105587

Uh oh!

Conversation

DataCorrupted commented Aug 21, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 21, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

oontvoo Aug 23, 2024

Choose a reason for hiding this comment

Uh oh!

ellishg Aug 23, 2024

Choose a reason for hiding this comment

Uh oh!

DataCorrupted Aug 23, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

DataCorrupted Aug 23, 2024

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[LLD, MachO] Add `getUnwindInfoEncodingHash` to `BPSectionOrderer.cpp`. #105587

[LLD, MachO] Add `getUnwindInfoEncodingHash` to `BPSectionOrderer.cpp`. #105587

DataCorrupted commented Aug 21, 2024 •

edited

Loading

llvmbot commented Aug 21, 2024 •

edited

Loading

DataCorrupted Aug 23, 2024 •

edited

Loading