-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[LLD, MachO] Add getUnwindInfoEncodingHash to BPSectionOrderer.cpp.
#105587
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-macho Author: Peter Rong (DataCorrupted) ChangesWe have seen that the order of UnwindInfo can change the size of the final binary [1]. To harness that, we add the hash of unwind info (after relocation) to the @ellishg Note: Full diff: https://github.com/llvm/llvm-project/pull/105587.diff 3 Files Affected:
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 568843d72bbb50..350b7498a03ce1 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -8,6 +8,7 @@
#include "BPSectionOrderer.h"
#include "InputSection.h"
+#include "UnwindInfoSection.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
@@ -60,6 +61,25 @@ getRelocHash(const Reloc &reloc,
return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
}
+static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
+ for (Symbol *sym : isec->symbols) {
+ if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+ if (auto *ue = d->unwindEntry()) {
+ CompactUnwindEntry cu;
+ cu.relocateOneCompactUnwindEntry(d);
+ if (cu.lsda)
+ return xxHash64("HAS LSDA");
+ StringRef name = cu.personality ? cu.personality->getName().empty()
+ ? "<unnamed>"
+ : cu.personality->getName()
+ : "<none>";
+ return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
+ }
+ }
+ }
+ return 0;
+}
+
static void constructNodesForCompression(
const SmallVector<const InputSection *> §ions,
const DenseMap<const InputSection *, uint64_t> §ionToIdx,
@@ -76,6 +96,8 @@ static void constructNodesForCompression(
const auto *isec = sections[sectionIdx];
constexpr unsigned windowSize = 4;
+ hashes.push_back(getUnwindInfoEncodingHash(isec));
+
for (size_t i = 0; i < isec->data.size(); i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
hashes.push_back(xxHash64(window));
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 7033481d6014b5..4f55af29b33140 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -109,14 +109,53 @@ CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);
#undef FOR_EACH_CU_FIELD
-// LLD's internal representation of a compact unwind entry.
-struct CompactUnwindEntry {
- uint64_t functionAddress;
- uint32_t functionLength;
- compact_unwind_encoding_t encoding;
- Symbol *personality;
- InputSection *lsda;
-};
+void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
+ const Defined *d) {
+ ConcatInputSection *unwindEntry = d->unwindEntry();
+ assert(unwindEntry);
+
+ functionAddress = d->getVA();
+ // If we have DWARF unwind info, create a slimmed-down CU entry that points
+ // to it.
+ if (unwindEntry->getName() == section_names::ehFrame) {
+ // The unwinder will look for the DWARF entry starting at the hint,
+ // assuming the hint points to a valid CFI record start. If it
+ // fails to find the record, it proceeds in a linear search through the
+ // contiguous CFI records from the hint until the end of the section.
+ // Ideally, in the case where the offset is too large to be encoded, we
+ // would instead encode the largest possible offset to a valid CFI record,
+ // but since we don't keep track of that, just encode zero -- the start of
+ // the section is always the start of a CFI record.
+ uint64_t dwarfOffsetHint =
+ d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
+ ? d->unwindEntry()->outSecOff
+ : 0;
+ encoding = target->modeDwarfEncoding | dwarfOffsetHint;
+ const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
+ functionLength = fde.funcLength;
+ // Omit the DWARF personality from compact-unwind entry so that we
+ // don't need to encode it.
+ personality = nullptr;
+ lsda = fde.lsda;
+ return;
+ }
+
+ assert(unwindEntry->getName() == section_names::compactUnwind);
+
+ CompactUnwindLayout cuLayout(target->wordSize);
+ auto buf = reinterpret_cast<const uint8_t *>(unwindEntry->data.data()) -
+ target->wordSize;
+ functionLength =
+ support::endian::read32le(buf + cuLayout.functionLengthOffset);
+ encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
+ for (const Reloc &r : unwindEntry->relocs) {
+ if (r.offset == cuLayout.personalityOffset)
+ personality = r.referent.get<Symbol *>();
+ else if (r.offset == cuLayout.lsdaOffset)
+ lsda = r.getReferentInputSection();
+ }
+ return;
+}
using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
@@ -355,45 +394,7 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
if (!d->unwindEntry())
return;
- // If we have DWARF unwind info, create a slimmed-down CU entry that points
- // to it.
- if (d->unwindEntry()->getName() == section_names::ehFrame) {
- // The unwinder will look for the DWARF entry starting at the hint,
- // assuming the hint points to a valid CFI record start. If it
- // fails to find the record, it proceeds in a linear search through the
- // contiguous CFI records from the hint until the end of the section.
- // Ideally, in the case where the offset is too large to be encoded, we
- // would instead encode the largest possible offset to a valid CFI record,
- // but since we don't keep track of that, just encode zero -- the start of
- // the section is always the start of a CFI record.
- uint64_t dwarfOffsetHint =
- d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
- ? d->unwindEntry()->outSecOff
- : 0;
- cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint;
- const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
- cu.functionLength = fde.funcLength;
- // Omit the DWARF personality from compact-unwind entry so that we
- // don't need to encode it.
- cu.personality = nullptr;
- cu.lsda = fde.lsda;
- return;
- }
-
- assert(d->unwindEntry()->getName() == section_names::compactUnwind);
-
- auto buf =
- reinterpret_cast<const uint8_t *>(d->unwindEntry()->data.data()) -
- target->wordSize;
- cu.functionLength =
- support::endian::read32le(buf + cuLayout.functionLengthOffset);
- cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
- for (const Reloc &r : d->unwindEntry()->relocs) {
- if (r.offset == cuLayout.personalityOffset)
- cu.personality = r.referent.get<Symbol *>();
- else if (r.offset == cuLayout.lsdaOffset)
- cu.lsda = r.getReferentInputSection();
- }
+ cu.relocateOneCompactUnwindEntry(d);
});
}
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 826573b0c44a00..1cdfa3b3d02753 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -34,6 +34,17 @@ class UnwindInfoSection : public SyntheticSection {
UnwindInfoSection *makeUnwindInfoSection();
+// LLD's internal representation of a compact unwind entry.
+struct CompactUnwindEntry {
+ uint64_t functionAddress;
+ uint32_t functionLength;
+ compact_unwind_encoding_t encoding;
+ Symbol *personality;
+ InputSection *lsda;
+
+ void relocateOneCompactUnwindEntry(const Defined *d);
+};
+
} // namespace lld::macho
#endif
|
We have seen that the order of UnwindInfo can change the size of the final binary. To harness that, we add the hash of unwind info (after relocation) to the BPSectionOrdere Signed-off-by: Peter Rong <[email protected]>
Signed-off-by: Peter Rong <[email protected]>
Signed-off-by: Peter Rong <[email protected]>
67ceff0 to
9e9d15a
Compare
Signed-off-by: Peter Rong <[email protected]>
Co-authored-by: Ellis Hoag <[email protected]>
Co-authored-by: Ellis Hoag <[email protected]>
lld/MachO/BPSectionOrderer.cpp
Outdated
| } | ||
|
|
||
| // Get a hash of the unwind info (after relocation). | ||
| // This hash is not 100% accurate, but it's good enough for compression. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: s/accurate/unique? (Or representative?)
(I'm not sure saying a hash string being "accurate" makes sense )
On the similar topic, it's possible to have more than one personality symbol with the same name, with one being from the dynamic library (eg., libc++) and the other being a user-defined one. (We've seen this in the past use cases).
Should their symbol type be included in the hash?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One key point is we want these hashes to be deterministic. Otherwise builds could end up with non-determinstic function order, which is what the lld/test/MachO/bp-section-orderer-stress.s aims to prevent. So this is why we cannot simply hash the personality pointer, even though that is what we'd like to do. I think it makes sense to include the symbol type in the hash if that's possible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are hashing personality->getName() already so it should be deterministic.
In the event that both lsda and personality exist, I'm proposing we use Hash(lsda->getName() + personality->getName() + utohexstr(encoding)) (name is "" if it is a nullpointer)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: s/accurate/unique? (Or representative?)
I think I'll just remove that line of comment to avoid confusion.
…ight be invalid. Signed-off-by: Peter Rong <[email protected]>
We have seen that the order of UnwindInfo can reduce the size of the final binary [1]. To harness that, we add the hash of unwind info (after relocation) to the
BPSectionOrderer@ellishg Note:
lsdaandpersonalitycould be set before relocation, we can't capture that during hashing.