Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions lld/MachO/BPSectionOrderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "BPSectionOrderer.h"
#include "InputSection.h"
#include "UnwindInfoSection.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
Expand Down Expand Up @@ -60,6 +61,30 @@ getRelocHash(const Reloc &reloc,
return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
}

// Get a hash of the unwind info (after relocation).
// This hash is not 100% accurate, but it's good enough for compression.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: s/accurate/unique? (Or representative?)
(I'm not sure saying a hash string being "accurate" makes sense )

On the similar topic, it's possible to have more than one personality symbol with the same name, with one being from the dynamic library (eg., libc++) and the other being a user-defined one. (We've seen this in the past use cases).
Should their symbol type be included in the hash?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One key point is we want these hashes to be deterministic. Otherwise builds could end up with non-determinstic function order, which is what the lld/test/MachO/bp-section-orderer-stress.s aims to prevent. So this is why we cannot simply hash the personality pointer, even though that is what we'd like to do. I think it makes sense to include the symbol type in the hash if that's possible.

Copy link
Member Author

@DataCorrupted DataCorrupted Aug 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are hashing personality->getName() already so it should be deterministic.

In the event that both lsda and personality exist, I'm proposing we use Hash(lsda->getName() + personality->getName() + utohexstr(encoding)) (name is "" if it is a nullpointer)

@ellishg

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: s/accurate/unique? (Or representative?)

I think I'll just remove that line of comment to avoid confusion.

//
// Unwind info will be eliminated if it is the same with its neighboors.
// We want to order functions such that the ones with similar unwind info
// can stay together.
// See more here:
// https://faultlore.com/blah/compact-unwinding/#page-tables
static uint64_t getUnwindInfoEncodingHash(const InputSection *isec) {
for (Symbol *sym : isec->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym)) {
if (!d->unwindEntry())
continue;
CompactUnwindEntry cu;
cu.relocateOneCompactUnwindEntry(d);
if (cu.lsda)
return xxHash64("HAS LSDA");
StringRef name = (cu.personality) ? cu.personality->getName() : "<null>";
return xxHash64((name + ";" + Twine::utohexstr(cu.encoding)).str());
}
}
return 0;
}

static void constructNodesForCompression(
const SmallVector<const InputSection *> &sections,
const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
Expand All @@ -76,6 +101,8 @@ static void constructNodesForCompression(
const auto *isec = sections[sectionIdx];
constexpr unsigned windowSize = 4;

hashes.push_back(getUnwindInfoEncodingHash(isec));

for (size_t i = 0; i < isec->data.size(); i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
hashes.push_back(xxHash64(window));
Expand Down
102 changes: 49 additions & 53 deletions lld/MachO/UnwindInfoSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,54 @@ CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);

#undef FOR_EACH_CU_FIELD

// LLD's internal representation of a compact unwind entry.
struct CompactUnwindEntry {
uint64_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Symbol *personality;
InputSection *lsda;
};
void lld::macho::CompactUnwindEntry::relocateOneCompactUnwindEntry(
const Defined *d) {
functionAddress = d->getVA();

ConcatInputSection *unwindEntry = d->unwindEntry();
if (!unwindEntry)
return;

// If we have DWARF unwind info, create a slimmed-down CU entry that points
// to it.
if (unwindEntry->getName() == section_names::ehFrame) {
// The unwinder will look for the DWARF entry starting at the hint,
// assuming the hint points to a valid CFI record start. If it
// fails to find the record, it proceeds in a linear search through the
// contiguous CFI records from the hint until the end of the section.
// Ideally, in the case where the offset is too large to be encoded, we
// would instead encode the largest possible offset to a valid CFI record,
// but since we don't keep track of that, just encode zero -- the start of
// the section is always the start of a CFI record.
uint64_t dwarfOffsetHint = unwindEntry->outSecOff <= DWARF_SECTION_OFFSET
? unwindEntry->outSecOff
: 0;
encoding = target->modeDwarfEncoding | dwarfOffsetHint;
const FDE &fde = cast<ObjFile>(d->getFile())->fdes[unwindEntry];
functionLength = fde.funcLength;
// Omit the DWARF personality from compact-unwind entry so that we
// don't need to encode it.
personality = nullptr;
lsda = fde.lsda;
return;
}

assert(unwindEntry->getName() == section_names::compactUnwind);

CompactUnwindLayout cuLayout(target->wordSize);
auto buf = reinterpret_cast<const uint8_t *>(unwindEntry->data.data()) -
target->wordSize;
functionLength =
support::endian::read32le(buf + cuLayout.functionLengthOffset);
encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
for (const Reloc &r : unwindEntry->relocs) {
if (r.offset == cuLayout.personalityOffset)
personality = r.referent.get<Symbol *>();
else if (r.offset == cuLayout.lsdaOffset)
lsda = r.getReferentInputSection();
}
return;
}

using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;

Expand Down Expand Up @@ -349,51 +389,7 @@ Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) {
void UnwindInfoSectionImpl::relocateCompactUnwind(
std::vector<CompactUnwindEntry> &cuEntries) {
parallelFor(0, symbolsVec.size(), [&](size_t i) {
CompactUnwindEntry &cu = cuEntries[i];
const Defined *d = symbolsVec[i].second;
cu.functionAddress = d->getVA();
if (!d->unwindEntry())
return;

// If we have DWARF unwind info, create a slimmed-down CU entry that points
// to it.
if (d->unwindEntry()->getName() == section_names::ehFrame) {
// The unwinder will look for the DWARF entry starting at the hint,
// assuming the hint points to a valid CFI record start. If it
// fails to find the record, it proceeds in a linear search through the
// contiguous CFI records from the hint until the end of the section.
// Ideally, in the case where the offset is too large to be encoded, we
// would instead encode the largest possible offset to a valid CFI record,
// but since we don't keep track of that, just encode zero -- the start of
// the section is always the start of a CFI record.
uint64_t dwarfOffsetHint =
d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
? d->unwindEntry()->outSecOff
: 0;
cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint;
const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
cu.functionLength = fde.funcLength;
// Omit the DWARF personality from compact-unwind entry so that we
// don't need to encode it.
cu.personality = nullptr;
cu.lsda = fde.lsda;
return;
}

assert(d->unwindEntry()->getName() == section_names::compactUnwind);

auto buf =
reinterpret_cast<const uint8_t *>(d->unwindEntry()->data.data()) -
target->wordSize;
cu.functionLength =
support::endian::read32le(buf + cuLayout.functionLengthOffset);
cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
for (const Reloc &r : d->unwindEntry()->relocs) {
if (r.offset == cuLayout.personalityOffset)
cu.personality = r.referent.get<Symbol *>();
else if (r.offset == cuLayout.lsdaOffset)
cu.lsda = r.getReferentInputSection();
}
cuEntries[i].relocateOneCompactUnwindEntry(symbolsVec[i].second);
});
}

Expand Down
12 changes: 12 additions & 0 deletions lld/MachO/UnwindInfoSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ class UnwindInfoSection : public SyntheticSection {

UnwindInfoSection *makeUnwindInfoSection();

// LLD's internal representation of a compact unwind entry.
struct CompactUnwindEntry {
uint64_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Symbol *personality;
InputSection *lsda;

// Relocate the entry to the given Symbol.
void relocateOneCompactUnwindEntry(const Defined *d);
};

} // namespace lld::macho

#endif
24 changes: 24 additions & 0 deletions lld/test/MachO/bp-section-orderer-stress.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ profiled_functions = function_names[: int(num_functions / 2)]
function_contents = [
f"""
{name}:
.cfi_startproc
.cfi_personality 155, _personality_{i % 5}
.cfi_lsda 16, _exception{i % 3}
add w0, w0, #{i % 4096}
add w1, w1, #{i % 10}
add w2, w0, #{i % 20}
adrp x3, {name}@PAGE
ret
.cfi_endproc
"""
for i, name in enumerate(function_names)
]
Expand Down Expand Up @@ -78,6 +82,26 @@ with open(assembly_filepath, "w") as f:
_main:
ret

_personality_0:
ret
_personality_1:
ret
_personality_2:
ret
_personality_3:
ret
_personality_4:
ret

_exception0:
.quad 0x4200

_exception1:
.quad 0x4210

_exception2:
.quad 0x4220

{"".join(function_contents)}

.data
Expand Down