@@ -127,17 +127,13 @@ static const load_command *findCommand(const mach_header_64 *hdr,
127
127
return nullptr ;
128
128
}
129
129
130
- std::vector<InputSection *>
131
- InputFile::parseSections (ArrayRef<section_64> sections) {
132
- std::vector<InputSection *> ret;
133
- ret.reserve (sections.size ());
134
-
130
+ void InputFile::parseSections (ArrayRef<section_64> sections) {
131
+ subsections.reserve (sections.size ());
135
132
auto *buf = reinterpret_cast <const uint8_t *>(mb.getBufferStart ());
136
133
137
134
for (const section_64 &sec : sections) {
138
135
InputSection *isec = make<InputSection>();
139
136
isec->file = this ;
140
- isec->header = &sec;
141
137
isec->name = StringRef (sec.sectname , strnlen (sec.sectname , 16 ));
142
138
isec->segname = StringRef (sec.segname , strnlen (sec.segname , 16 ));
143
139
isec->data = {buf + sec.offset , static_cast <size_t >(sec.size )};
@@ -147,96 +143,185 @@ InputFile::parseSections(ArrayRef<section_64> sections) {
147
143
else
148
144
isec->align = 1 << sec.align ;
149
145
isec->flags = sec.flags ;
150
- ret .push_back (isec);
146
+ subsections .push_back ({{ 0 , isec}} );
151
147
}
148
+ }
152
149
153
- return ret;
150
+ // Find the subsection corresponding to the greatest section offset that is <=
151
+ // that of the given offset.
152
+ //
153
+ // offset: an offset relative to the start of the original InputSection (before
154
+ // any subsection splitting has occurred). It will be updated to represent the
155
+ // same location as an offset relative to the start of the containing
156
+ // subsection.
157
+ static InputSection *findContainingSubsection (SubsectionMap &map,
158
+ uint32_t *offset) {
159
+ auto it = std::prev (map.upper_bound (*offset));
160
+ *offset -= it->first ;
161
+ return it->second ;
154
162
}
155
163
156
164
void InputFile::parseRelocations (const section_64 &sec,
157
- std::vector<Reloc> &relocs ) {
165
+ SubsectionMap &subsecMap ) {
158
166
auto *buf = reinterpret_cast <const uint8_t *>(mb.getBufferStart ());
159
167
ArrayRef<any_relocation_info> relInfos (
160
168
reinterpret_cast <const any_relocation_info *>(buf + sec.reloff ),
161
169
sec.nreloc );
162
170
163
171
for (const any_relocation_info &anyRel : relInfos) {
172
+ if (anyRel.r_word0 & R_SCATTERED)
173
+ fatal (" TODO: Scattered relocations not supported" );
174
+
175
+ auto rel = reinterpret_cast <const relocation_info &>(anyRel);
176
+ if (!rel.r_pcrel )
177
+ fatal (" TODO: Only pcrel relocations are supported" );
178
+
164
179
Reloc r;
165
- if (anyRel.r_word0 & R_SCATTERED) {
166
- error (" TODO: Scattered relocations not supported" );
180
+ r.type = rel.r_type ;
181
+ uint32_t secRelOffset = rel.r_address ;
182
+ uint64_t rawAddend =
183
+ target->getImplicitAddend (buf + sec.offset + secRelOffset, r.type );
184
+
185
+ if (rel.r_extern ) {
186
+ r.target = symbols[rel.r_symbolnum ];
187
+ r.addend = rawAddend;
167
188
} else {
168
- auto rel = reinterpret_cast <const relocation_info &>(anyRel);
169
- r.type = rel.r_type ;
170
- r.offset = rel.r_address ;
171
- r.addend = target->getImplicitAddend (buf + sec.offset + r.offset , r.type );
172
- if (rel.r_extern ) {
173
- r.target = symbols[rel.r_symbolnum ];
174
- } else {
175
- if (rel.r_symbolnum == 0 || rel.r_symbolnum > sections.size ())
176
- fatal (" invalid section index in relocation for offset " +
177
- std::to_string (r.offset ) + " in section " + sec.sectname +
178
- " of " + getName ());
179
- r.target = sections[rel.r_symbolnum - 1 ];
180
- }
189
+ if (rel.r_symbolnum == 0 || rel.r_symbolnum > subsections.size ())
190
+ fatal (" invalid section index in relocation for offset " +
191
+ std::to_string (r.offset ) + " in section " + sec.sectname +
192
+ " of " + getName ());
193
+
194
+ SubsectionMap &targetSubsecMap = subsections[rel.r_symbolnum - 1 ];
195
+ const section_64 &targetSec = sectionHeaders[rel.r_symbolnum - 1 ];
196
+ // The implicit addend for pcrel section relocations is the pcrel offset
197
+ // in terms of the addresses in the input file. Here we adjust it so that
198
+ // it describes the offset from the start of the target section.
199
+ // TODO: Figure out what to do for non-pcrel section relocations.
200
+ // TODO: The offset of 4 is probably not right for ARM64, nor for
201
+ // relocations with r_length != 2.
202
+ uint32_t targetOffset =
203
+ sec.addr + secRelOffset + 4 + rawAddend - targetSec.addr ;
204
+ r.target = findContainingSubsection (targetSubsecMap, &targetOffset);
205
+ r.addend = targetOffset;
181
206
}
182
- relocs.push_back (r);
207
+
208
+ InputSection *subsec = findContainingSubsection (subsecMap, &secRelOffset);
209
+ r.offset = secRelOffset;
210
+ subsec->relocs .push_back (r);
211
+ }
212
+ }
213
+
214
+ void InputFile::parseSymbols (ArrayRef<nlist_64> nList, const char *strtab,
215
+ bool subsectionsViaSymbols) {
216
+ // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
217
+ // out-of-sequence.
218
+ symbols.resize (nList.size ());
219
+ std::vector<size_t > altEntrySymIdxs;
220
+
221
+ auto createDefined = [&](const nlist_64 &sym, InputSection *isec,
222
+ uint32_t value) -> Symbol * {
223
+ StringRef name = strtab + sym.n_strx ;
224
+ if (sym.n_type & N_EXT)
225
+ // Global defined symbol
226
+ return symtab->addDefined (name, isec, value);
227
+ else
228
+ // Local defined symbol
229
+ return make<Defined>(name, isec, value);
230
+ };
231
+
232
+ for (size_t i = 0 , n = nList.size (); i < n; ++i) {
233
+ const nlist_64 &sym = nList[i];
234
+
235
+ // Undefined symbol
236
+ if (!sym.n_sect ) {
237
+ StringRef name = strtab + sym.n_strx ;
238
+ symbols[i] = symtab->addUndefined (name);
239
+ continue ;
240
+ }
241
+
242
+ const section_64 &sec = sectionHeaders[sym.n_sect - 1 ];
243
+ SubsectionMap &subsecMap = subsections[sym.n_sect - 1 ];
244
+ uint64_t offset = sym.n_value - sec.addr ;
245
+
246
+ // If the input file does not use subsections-via-symbols, all symbols can
247
+ // use the same subsection. Otherwise, we must split the sections along
248
+ // symbol boundaries.
249
+ if (!subsectionsViaSymbols) {
250
+ symbols[i] = createDefined (sym, subsecMap[0 ], offset);
251
+ continue ;
252
+ }
253
+
254
+ // nList entries aren't necessarily arranged in address order. Therefore,
255
+ // we can't create alt-entry symbols at this point because a later symbol
256
+ // may split its section, which may affect which subsection the alt-entry
257
+ // symbol is assigned to. So we need to handle them in a second pass below.
258
+ if (sym.n_desc & N_ALT_ENTRY) {
259
+ altEntrySymIdxs.push_back (i);
260
+ continue ;
261
+ }
262
+
263
+ // Find the subsection corresponding to the greatest section offset that is
264
+ // <= that of the current symbol. The subsection that we find either needs
265
+ // to be used directly or split in two.
266
+ uint32_t firstSize = offset;
267
+ InputSection *firstIsec = findContainingSubsection (subsecMap, &firstSize);
268
+
269
+ if (firstSize == 0 ) {
270
+ // Alias of an existing symbol, or the first symbol in the section. These
271
+ // are handled by reusing the existing section.
272
+ symbols[i] = createDefined (sym, firstIsec, 0 );
273
+ continue ;
274
+ }
275
+
276
+ // We saw a symbol definition at a new offset. Split the section into two
277
+ // subsections. The new symbol uses the second subsection.
278
+ auto *secondIsec = make<InputSection>(*firstIsec);
279
+ secondIsec->data = firstIsec->data .slice (firstSize);
280
+ firstIsec->data = firstIsec->data .slice (0 , firstSize);
281
+ // TODO: ld64 appears to preserve the original alignment as well as each
282
+ // subsection's offset from the last aligned address. We should consider
283
+ // emulating that behavior.
284
+ secondIsec->align = MinAlign (firstIsec->align , offset);
285
+
286
+ subsecMap[offset] = secondIsec;
287
+ // By construction, the symbol will be at offset zero in the new section.
288
+ symbols[i] = createDefined (sym, secondIsec, 0 );
289
+ }
290
+
291
+ for (size_t idx : altEntrySymIdxs) {
292
+ const nlist_64 &sym = nList[idx];
293
+ SubsectionMap &subsecMap = subsections[sym.n_sect - 1 ];
294
+ uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1 ].addr ;
295
+ InputSection *subsec = findContainingSubsection (subsecMap, &off);
296
+ symbols[idx] = createDefined (sym, subsec, off);
183
297
}
184
298
}
185
299
186
300
ObjFile::ObjFile (MemoryBufferRef mb) : InputFile(ObjKind, mb) {
187
301
auto *buf = reinterpret_cast <const uint8_t *>(mb.getBufferStart ());
188
302
auto *hdr = reinterpret_cast <const mach_header_64 *>(mb.getBufferStart ());
189
- ArrayRef<section_64> objSections;
190
303
191
304
if (const load_command *cmd = findCommand (hdr, LC_SEGMENT_64)) {
192
305
auto *c = reinterpret_cast <const segment_command_64 *>(cmd);
193
- objSections = ArrayRef<section_64>{
306
+ sectionHeaders = ArrayRef<section_64>{
194
307
reinterpret_cast <const section_64 *>(c + 1 ), c->nsects };
195
- sections = parseSections (objSections );
308
+ parseSections (sectionHeaders );
196
309
}
197
310
198
311
// TODO: Error on missing LC_SYMTAB?
199
312
if (const load_command *cmd = findCommand (hdr, LC_SYMTAB)) {
200
313
auto *c = reinterpret_cast <const symtab_command *>(cmd);
201
- const char *strtab = reinterpret_cast <const char *>(buf) + c->stroff ;
202
- ArrayRef<const nlist_64> nList (
314
+ ArrayRef<nlist_64> nList (
203
315
reinterpret_cast <const nlist_64 *>(buf + c->symoff ), c->nsyms );
204
-
205
- symbols.reserve (c->nsyms );
206
-
207
- for (const nlist_64 &sym : nList) {
208
- StringRef name = strtab + sym.n_strx ;
209
-
210
- // Undefined symbol
211
- if (!sym.n_sect ) {
212
- symbols.push_back (symtab->addUndefined (name));
213
- continue ;
214
- }
215
-
216
- InputSection *isec = sections[sym.n_sect - 1 ];
217
- const section_64 &objSec = objSections[sym.n_sect - 1 ];
218
- uint64_t value = sym.n_value - objSec.addr ;
219
-
220
- // Global defined symbol
221
- if (sym.n_type & N_EXT) {
222
- symbols.push_back (symtab->addDefined (name, isec, value));
223
- continue ;
224
- }
225
-
226
- // Local defined symbol
227
- symbols.push_back (make<Defined>(name, isec, value));
228
- }
316
+ const char *strtab = reinterpret_cast <const char *>(buf) + c->stroff ;
317
+ bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
318
+ parseSymbols (nList, strtab, subsectionsViaSymbols);
229
319
}
230
320
231
321
// The relocations may refer to the symbols, so we parse them after we have
232
- // the symbols loaded.
233
- if (!sections.empty ()) {
234
- auto it = sections.begin ();
235
- for (const section_64 &sec : objSections) {
236
- parseRelocations (sec, (*it)->relocs );
237
- ++it;
238
- }
239
- }
322
+ // parsed all the symbols.
323
+ for (size_t i = 0 , n = subsections.size (); i < n; ++i)
324
+ parseRelocations (sectionHeaders[i], subsections[i]);
240
325
}
241
326
242
327
DylibFile::DylibFile (MemoryBufferRef mb, DylibFile *umbrella)
@@ -324,7 +409,8 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
324
409
sym.getName ());
325
410
auto file = make<ObjFile>(mb);
326
411
symbols.insert (symbols.end (), file->symbols .begin (), file->symbols .end ());
327
- sections.insert (sections.end (), file->sections .begin (), file->sections .end ());
412
+ subsections.insert (subsections.end (), file->subsections .begin (),
413
+ file->subsections .end ());
328
414
}
329
415
330
416
// Returns "<internal>" or "baz.o".
0 commit comments