Skip to content

Commit 9565bf5

Browse files
int3memfrob
authored andcommitted
[lld-macho][re-land] Support .subsections_via_symbols
Summary: This diff restores and builds upon @pcc and @RuiU's initial work on subsections. The .subsections_via_symbols directive indicates we can split each section along symbol boundaries, unless those symbols have been marked with `.alt_entry`. We exercise this functionality in our tests by using order files that rearrange those symbols. Depends on D79668. Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee Reviewed By: smeenai Subscribers: thakis, llvm-commits, pcc, ruiu Tags: #llvm Differential Revision: https://reviews.llvm.org/D79926
1 parent e610d1e commit 9565bf5

File tree

7 files changed

+279
-81
lines changed

7 files changed

+279
-81
lines changed

lld/MachO/Driver.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,9 +326,14 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
326326
createSyntheticSections();
327327

328328
// Initialize InputSections.
329-
for (InputFile *file : inputFiles)
330-
for (InputSection *sec : file->sections)
331-
inputSections.push_back(sec);
329+
for (InputFile *file : inputFiles) {
330+
for (SubsectionMap &map : file->subsections) {
331+
for (auto &p : map) {
332+
InputSection *isec = p.second;
333+
inputSections.push_back(isec);
334+
}
335+
}
336+
}
332337

333338
// Write to an output file.
334339
writeResult();

lld/MachO/InputFiles.cpp

Lines changed: 150 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -127,17 +127,13 @@ static const load_command *findCommand(const mach_header_64 *hdr,
127127
return nullptr;
128128
}
129129

130-
std::vector<InputSection *>
131-
InputFile::parseSections(ArrayRef<section_64> sections) {
132-
std::vector<InputSection *> ret;
133-
ret.reserve(sections.size());
134-
130+
void InputFile::parseSections(ArrayRef<section_64> sections) {
131+
subsections.reserve(sections.size());
135132
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
136133

137134
for (const section_64 &sec : sections) {
138135
InputSection *isec = make<InputSection>();
139136
isec->file = this;
140-
isec->header = &sec;
141137
isec->name = StringRef(sec.sectname, strnlen(sec.sectname, 16));
142138
isec->segname = StringRef(sec.segname, strnlen(sec.segname, 16));
143139
isec->data = {buf + sec.offset, static_cast<size_t>(sec.size)};
@@ -147,96 +143,185 @@ InputFile::parseSections(ArrayRef<section_64> sections) {
147143
else
148144
isec->align = 1 << sec.align;
149145
isec->flags = sec.flags;
150-
ret.push_back(isec);
146+
subsections.push_back({{0, isec}});
151147
}
148+
}
152149

153-
return ret;
150+
// Find the subsection corresponding to the greatest section offset that is <=
151+
// that of the given offset.
152+
//
153+
// offset: an offset relative to the start of the original InputSection (before
154+
// any subsection splitting has occurred). It will be updated to represent the
155+
// same location as an offset relative to the start of the containing
156+
// subsection.
157+
static InputSection *findContainingSubsection(SubsectionMap &map,
158+
uint32_t *offset) {
159+
auto it = std::prev(map.upper_bound(*offset));
160+
*offset -= it->first;
161+
return it->second;
154162
}
155163

156164
void InputFile::parseRelocations(const section_64 &sec,
157-
std::vector<Reloc> &relocs) {
165+
SubsectionMap &subsecMap) {
158166
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
159167
ArrayRef<any_relocation_info> relInfos(
160168
reinterpret_cast<const any_relocation_info *>(buf + sec.reloff),
161169
sec.nreloc);
162170

163171
for (const any_relocation_info &anyRel : relInfos) {
172+
if (anyRel.r_word0 & R_SCATTERED)
173+
fatal("TODO: Scattered relocations not supported");
174+
175+
auto rel = reinterpret_cast<const relocation_info &>(anyRel);
176+
if (!rel.r_pcrel)
177+
fatal("TODO: Only pcrel relocations are supported");
178+
164179
Reloc r;
165-
if (anyRel.r_word0 & R_SCATTERED) {
166-
error("TODO: Scattered relocations not supported");
180+
r.type = rel.r_type;
181+
uint32_t secRelOffset = rel.r_address;
182+
uint64_t rawAddend =
183+
target->getImplicitAddend(buf + sec.offset + secRelOffset, r.type);
184+
185+
if (rel.r_extern) {
186+
r.target = symbols[rel.r_symbolnum];
187+
r.addend = rawAddend;
167188
} else {
168-
auto rel = reinterpret_cast<const relocation_info &>(anyRel);
169-
r.type = rel.r_type;
170-
r.offset = rel.r_address;
171-
r.addend = target->getImplicitAddend(buf + sec.offset + r.offset, r.type);
172-
if (rel.r_extern) {
173-
r.target = symbols[rel.r_symbolnum];
174-
} else {
175-
if (rel.r_symbolnum == 0 || rel.r_symbolnum > sections.size())
176-
fatal("invalid section index in relocation for offset " +
177-
std::to_string(r.offset) + " in section " + sec.sectname +
178-
" of " + getName());
179-
r.target = sections[rel.r_symbolnum - 1];
180-
}
189+
if (rel.r_symbolnum == 0 || rel.r_symbolnum > subsections.size())
190+
fatal("invalid section index in relocation for offset " +
191+
std::to_string(r.offset) + " in section " + sec.sectname +
192+
" of " + getName());
193+
194+
SubsectionMap &targetSubsecMap = subsections[rel.r_symbolnum - 1];
195+
const section_64 &targetSec = sectionHeaders[rel.r_symbolnum - 1];
196+
// The implicit addend for pcrel section relocations is the pcrel offset
197+
// in terms of the addresses in the input file. Here we adjust it so that
198+
// it describes the offset from the start of the target section.
199+
// TODO: Figure out what to do for non-pcrel section relocations.
200+
// TODO: The offset of 4 is probably not right for ARM64, nor for
201+
// relocations with r_length != 2.
202+
uint32_t targetOffset =
203+
sec.addr + secRelOffset + 4 + rawAddend - targetSec.addr;
204+
r.target = findContainingSubsection(targetSubsecMap, &targetOffset);
205+
r.addend = targetOffset;
181206
}
182-
relocs.push_back(r);
207+
208+
InputSection *subsec = findContainingSubsection(subsecMap, &secRelOffset);
209+
r.offset = secRelOffset;
210+
subsec->relocs.push_back(r);
211+
}
212+
}
213+
214+
void InputFile::parseSymbols(ArrayRef<nlist_64> nList, const char *strtab,
215+
bool subsectionsViaSymbols) {
216+
// resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
217+
// out-of-sequence.
218+
symbols.resize(nList.size());
219+
std::vector<size_t> altEntrySymIdxs;
220+
221+
auto createDefined = [&](const nlist_64 &sym, InputSection *isec,
222+
uint32_t value) -> Symbol * {
223+
StringRef name = strtab + sym.n_strx;
224+
if (sym.n_type & N_EXT)
225+
// Global defined symbol
226+
return symtab->addDefined(name, isec, value);
227+
else
228+
// Local defined symbol
229+
return make<Defined>(name, isec, value);
230+
};
231+
232+
for (size_t i = 0, n = nList.size(); i < n; ++i) {
233+
const nlist_64 &sym = nList[i];
234+
235+
// Undefined symbol
236+
if (!sym.n_sect) {
237+
StringRef name = strtab + sym.n_strx;
238+
symbols[i] = symtab->addUndefined(name);
239+
continue;
240+
}
241+
242+
const section_64 &sec = sectionHeaders[sym.n_sect - 1];
243+
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
244+
uint64_t offset = sym.n_value - sec.addr;
245+
246+
// If the input file does not use subsections-via-symbols, all symbols can
247+
// use the same subsection. Otherwise, we must split the sections along
248+
// symbol boundaries.
249+
if (!subsectionsViaSymbols) {
250+
symbols[i] = createDefined(sym, subsecMap[0], offset);
251+
continue;
252+
}
253+
254+
// nList entries aren't necessarily arranged in address order. Therefore,
255+
// we can't create alt-entry symbols at this point because a later symbol
256+
// may split its section, which may affect which subsection the alt-entry
257+
// symbol is assigned to. So we need to handle them in a second pass below.
258+
if (sym.n_desc & N_ALT_ENTRY) {
259+
altEntrySymIdxs.push_back(i);
260+
continue;
261+
}
262+
263+
// Find the subsection corresponding to the greatest section offset that is
264+
// <= that of the current symbol. The subsection that we find either needs
265+
// to be used directly or split in two.
266+
uint32_t firstSize = offset;
267+
InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize);
268+
269+
if (firstSize == 0) {
270+
// Alias of an existing symbol, or the first symbol in the section. These
271+
// are handled by reusing the existing section.
272+
symbols[i] = createDefined(sym, firstIsec, 0);
273+
continue;
274+
}
275+
276+
// We saw a symbol definition at a new offset. Split the section into two
277+
// subsections. The new symbol uses the second subsection.
278+
auto *secondIsec = make<InputSection>(*firstIsec);
279+
secondIsec->data = firstIsec->data.slice(firstSize);
280+
firstIsec->data = firstIsec->data.slice(0, firstSize);
281+
// TODO: ld64 appears to preserve the original alignment as well as each
282+
// subsection's offset from the last aligned address. We should consider
283+
// emulating that behavior.
284+
secondIsec->align = MinAlign(firstIsec->align, offset);
285+
286+
subsecMap[offset] = secondIsec;
287+
// By construction, the symbol will be at offset zero in the new section.
288+
symbols[i] = createDefined(sym, secondIsec, 0);
289+
}
290+
291+
for (size_t idx : altEntrySymIdxs) {
292+
const nlist_64 &sym = nList[idx];
293+
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
294+
uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
295+
InputSection *subsec = findContainingSubsection(subsecMap, &off);
296+
symbols[idx] = createDefined(sym, subsec, off);
183297
}
184298
}
185299

186300
ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) {
187301
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
188302
auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
189-
ArrayRef<section_64> objSections;
190303

191304
if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
192305
auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
193-
objSections = ArrayRef<section_64>{
306+
sectionHeaders = ArrayRef<section_64>{
194307
reinterpret_cast<const section_64 *>(c + 1), c->nsects};
195-
sections = parseSections(objSections);
308+
parseSections(sectionHeaders);
196309
}
197310

198311
// TODO: Error on missing LC_SYMTAB?
199312
if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
200313
auto *c = reinterpret_cast<const symtab_command *>(cmd);
201-
const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
202-
ArrayRef<const nlist_64> nList(
314+
ArrayRef<nlist_64> nList(
203315
reinterpret_cast<const nlist_64 *>(buf + c->symoff), c->nsyms);
204-
205-
symbols.reserve(c->nsyms);
206-
207-
for (const nlist_64 &sym : nList) {
208-
StringRef name = strtab + sym.n_strx;
209-
210-
// Undefined symbol
211-
if (!sym.n_sect) {
212-
symbols.push_back(symtab->addUndefined(name));
213-
continue;
214-
}
215-
216-
InputSection *isec = sections[sym.n_sect - 1];
217-
const section_64 &objSec = objSections[sym.n_sect - 1];
218-
uint64_t value = sym.n_value - objSec.addr;
219-
220-
// Global defined symbol
221-
if (sym.n_type & N_EXT) {
222-
symbols.push_back(symtab->addDefined(name, isec, value));
223-
continue;
224-
}
225-
226-
// Local defined symbol
227-
symbols.push_back(make<Defined>(name, isec, value));
228-
}
316+
const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
317+
bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
318+
parseSymbols(nList, strtab, subsectionsViaSymbols);
229319
}
230320

231321
// The relocations may refer to the symbols, so we parse them after we have
232-
// the symbols loaded.
233-
if (!sections.empty()) {
234-
auto it = sections.begin();
235-
for (const section_64 &sec : objSections) {
236-
parseRelocations(sec, (*it)->relocs);
237-
++it;
238-
}
239-
}
322+
// parsed all the symbols.
323+
for (size_t i = 0, n = subsections.size(); i < n; ++i)
324+
parseRelocations(sectionHeaders[i], subsections[i]);
240325
}
241326

242327
DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
@@ -324,7 +409,8 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
324409
sym.getName());
325410
auto file = make<ObjFile>(mb);
326411
symbols.insert(symbols.end(), file->symbols.begin(), file->symbols.end());
327-
sections.insert(sections.end(), file->sections.begin(), file->sections.end());
412+
subsections.insert(subsections.end(), file->subsections.begin(),
413+
file->subsections.end());
328414
}
329415

330416
// Returns "<internal>" or "baz.o".

lld/MachO/InputFiles.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "llvm/BinaryFormat/MachO.h"
1515
#include "llvm/Object/Archive.h"
1616
#include "llvm/Support/MemoryBuffer.h"
17+
18+
#include <map>
1719
#include <vector>
1820

1921
namespace lld {
@@ -23,6 +25,11 @@ class InputSection;
2325
class Symbol;
2426
struct Reloc;
2527

28+
// If .subsections_via_symbols is set, each InputSection will be split along
29+
// symbol boundaries. The keys of a SubsectionMap represent the offsets of
30+
// each subsection from the start of the original pre-split InputSection.
31+
using SubsectionMap = std::map<uint32_t, InputSection *>;
32+
2633
class InputFile {
2734
public:
2835
enum Kind {
@@ -37,15 +44,18 @@ class InputFile {
3744

3845
MemoryBufferRef mb;
3946
std::vector<Symbol *> symbols;
40-
std::vector<InputSection *> sections;
47+
ArrayRef<llvm::MachO::section_64> sectionHeaders;
48+
std::vector<SubsectionMap> subsections;
4149

4250
protected:
4351
InputFile(Kind kind, MemoryBufferRef mb) : mb(mb), fileKind(kind) {}
4452

45-
std::vector<InputSection *> parseSections(ArrayRef<llvm::MachO::section_64>);
53+
void parseSections(ArrayRef<llvm::MachO::section_64>);
54+
55+
void parseSymbols(ArrayRef<llvm::MachO::nlist_64> nList, const char *strtab,
56+
bool subsectionsViaSymbols);
4657

47-
void parseRelocations(const llvm::MachO::section_64 &,
48-
std::vector<Reloc> &relocs);
58+
void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &);
4959

5060
private:
5161
const Kind fileKind;

lld/MachO/InputSection.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ void InputSection::writeTo(uint8_t *buf) {
3232

3333
for (Reloc &r : relocs) {
3434
uint64_t va = 0;
35-
uint64_t addend = r.addend;
3635
if (auto *s = r.target.dyn_cast<Symbol *>()) {
3736
if (auto *dylibSymbol = dyn_cast<DylibSymbol>(s)) {
3837
va = target->getDylibSymbolVA(*dylibSymbol, r.type);
@@ -41,15 +40,9 @@ void InputSection::writeTo(uint8_t *buf) {
4140
}
4241
} else if (auto *isec = r.target.dyn_cast<InputSection *>()) {
4342
va = isec->getVA();
44-
// The implicit addend for pcrel section relocations is the pcrel offset
45-
// in terms of the addresses in the input file. Here we adjust it so that
46-
// it describes the offset from the start of the target section.
47-
// TODO: Figure out what to do for non-pcrel section relocations.
48-
// TODO: The offset of 4 is probably not right for ARM64.
49-
addend -= isec->header->addr - (header->addr + r.offset + 4);
5043
}
5144

52-
uint64_t val = va + addend;
45+
uint64_t val = va + r.addend;
5346
if (1) // TODO: handle non-pcrel relocations
5447
val -= getVA() + r.offset;
5548
target->relocateOne(buf + r.offset, r.type, val);

lld/MachO/InputSection.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ class Symbol;
2424

2525
struct Reloc {
2626
uint8_t type;
27+
// Adding this offset to the address of the target symbol or subsection gives
28+
// the destination that this relocation refers to.
2729
uint32_t addend;
30+
// The offset from the start of the subsection that this relocation belongs
31+
// to.
2832
uint32_t offset;
2933
llvm::PointerUnion<Symbol *, InputSection *> target;
3034
};
@@ -42,8 +46,6 @@ class InputSection {
4246
InputFile *file = nullptr;
4347
StringRef name;
4448
StringRef segname;
45-
// This provides access to the address of the section in the input file.
46-
const llvm::MachO::section_64 *header;
4749

4850
OutputSection *parent = nullptr;
4951
uint64_t outSecOff = 0;

0 commit comments

Comments
 (0)