Skip to content

Commit 474d9a2

Browse files
author
Alex B
committed
[lld-macho] Fix safe_thunks / --keep-icf-stabs compatibility
1 parent e3e7c75 commit 474d9a2

File tree

7 files changed

+138
-29
lines changed

7 files changed

+138
-29
lines changed

lld/MachO/Arch/ARM64.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ struct ARM64 : ARM64Common {
4444

4545
void initICFSafeThunkBody(InputSection *thunk,
4646
InputSection *branchTarget) const override;
47+
InputSection *getThunkBranchTarget(InputSection *thunk) const override;
4748
uint32_t getICFSafeThunkSize() const override;
4849
};
4950

@@ -197,6 +198,16 @@ void ARM64::initICFSafeThunkBody(InputSection *thunk,
197198
/*referent=*/branchTarget);
198199
}
199200

201+
InputSection *ARM64::getThunkBranchTarget(InputSection *thunk) const {
202+
assert(thunk->relocs.size() == 1 &&
203+
"expected a single reloc on ARM64 ICF thunk");
204+
auto &reloc = thunk->relocs[0];
205+
assert(reloc.referent.is<InputSection *>() &&
206+
"ARM64 thunk reloc is expected to point to an InputSection");
207+
208+
return reloc.referent.dyn_cast<InputSection *>();
209+
}
210+
200211
uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
201212

202213
ARM64::ARM64() : ARM64Common(LP64()) {

lld/MachO/ICF.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,34 @@ void macho::markAddrSigSymbols() {
481481
}
482482
}
483483

484+
// Given a symbol that was folded into a thunk, return the symbol pointing to
485+
// the actual body of the function. We use this approach rather than storing the
486+
// needed info in the Defined itself in order to minimize memory usage.
487+
Defined *macho::getBodyForThunkFoldedSym(Defined *foldedSym) {
488+
assert(isa<ConcatInputSection>(foldedSym->originalIsec) &&
489+
"thunk-folded ICF symbol expected to be on a ConcatInputSection");
490+
// foldedSec is the InputSection that was marked as deleted upon fold
491+
ConcatInputSection *foldedSec =
492+
cast<ConcatInputSection>(foldedSym->originalIsec);
493+
494+
// thunkBody is the actual live thunk, containing the code that branches to
495+
// the actual body of the function.
496+
InputSection *thunkBody = foldedSec->replacement;
497+
498+
// the actual (merged) body of the function that the thunk jumps to. This will
499+
// end up in the final binary.
500+
InputSection *functionBody = target->getThunkBranchTarget(thunkBody);
501+
502+
for (Symbol *sym : functionBody->symbols) {
503+
Defined *d = dyn_cast<Defined>(sym);
504+
// The symbol needs to be at the start of the InputSection
505+
if (d && d->value == 0)
506+
return d;
507+
}
508+
509+
llvm_unreachable("could not find body symbol for ICF-generated thunk");
510+
return nullptr;
511+
}
484512
void macho::foldIdenticalSections(bool onlyCfStrings) {
485513
TimeTraceScope timeScope("Fold Identical Code Sections");
486514
// The ICF equivalence-class segregation algorithm relies on pre-computed

lld/MachO/ICF.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,17 @@
1515

1616
namespace lld::macho {
1717
class Symbol;
18+
class Defined;
1819

1920
void markAddrSigSymbols();
2021
void markSymAsAddrSig(Symbol *s);
2122
void foldIdenticalSections(bool onlyCfStrings);
2223

24+
// Given a symbol that was folded into a thunk, return the symbol pointing to
25+
// the actual body of the function. We expose this function to allow getting the
26+
// main function body for a symbol that was folded via a thunk.
27+
Defined *getBodyForThunkFoldedSym(Defined *foldedSym);
28+
2329
} // namespace lld::macho
2430

2531
#endif

lld/MachO/SyntheticSections.cpp

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "ConcatOutputSection.h"
1111
#include "Config.h"
1212
#include "ExportTrie.h"
13+
#include "ICF.h"
1314
#include "InputFiles.h"
1415
#include "MachOStructs.h"
1516
#include "ObjC.h"
@@ -1204,6 +1205,14 @@ void SymtabSection::emitEndFunStab(Defined *defined) {
12041205
stabs.emplace_back(std::move(stab));
12051206
}
12061207

1208+
Defined *SymtabSection::getFuncBodySym(Defined *originalSym) {
1209+
// If the Defined is not a thunk, we can use it directly
1210+
if (originalSym->identicalCodeFoldingKind != Symbol::ICFFoldKind::Thunk)
1211+
return originalSym;
1212+
1213+
return macho::getBodyForThunkFoldedSym(originalSym);
1214+
}
1215+
12071216
void SymtabSection::emitStabs() {
12081217
if (config->omitDebugInfo)
12091218
return;
@@ -1214,9 +1223,14 @@ void SymtabSection::emitStabs() {
12141223
stabs.emplace_back(std::move(astStab));
12151224
}
12161225

1217-
// Cache the file ID for each symbol in an std::pair for faster sorting.
1218-
using SortingPair = std::pair<Defined *, int>;
1219-
std::vector<SortingPair> symbolsNeedingStabs;
1226+
struct SymbolStabInfo {
1227+
Defined *originalSym; // Original Defined symbol - this may be an ICF thunk
1228+
int fileId; // File ID associated with the STABS symbol
1229+
Defined *mainBodySym; // Symbol that consists of the full function body -
1230+
// use this for the STABS entry
1231+
};
1232+
1233+
std::vector<SymbolStabInfo> symbolsNeedingStabs;
12201234
for (const SymtabEntry &entry :
12211235
concat<SymtabEntry>(localSymbols, externalSymbols)) {
12221236
Symbol *sym = entry.sym;
@@ -1229,20 +1243,10 @@ void SymtabSection::emitStabs() {
12291243
if (defined->isAbsolute())
12301244
continue;
12311245

1232-
// Never generate a STABS entry for a symbol that has been ICF'ed using a
1233-
// thunk, just as we do for fully ICF'ed functions. Otherwise, we end up
1234-
// generating invalid DWARF as dsymutil will assume the entire function
1235-
// body is at that location, when, in reality, only the thunk is
1236-
// present. This will end up causing overlapping DWARF entries.
1237-
// TODO: Find an implementation that works in combination with
1238-
// `--keep-icf-stabs`.
1239-
if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk)
1240-
continue;
1241-
12421246
// Constant-folded symbols go in the executable's symbol table, but don't
1243-
// get a stabs entry unless --keep-icf-stabs flag is specified
1247+
// get a stabs entry unless --keep-icf-stabs flag is specified.
12441248
if (!config->keepICFStabs &&
1245-
defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
1249+
defined->identicalCodeFoldingKind != Symbol::ICFFoldKind::None)
12461250
continue;
12471251

12481252
ObjFile *file = defined->getObjectFile();
@@ -1251,26 +1255,26 @@ void SymtabSection::emitStabs() {
12511255

12521256
// We use 'originalIsec' to get the file id of the symbol since 'isec()'
12531257
// might point to the merged ICF symbol's file
1254-
symbolsNeedingStabs.emplace_back(defined,
1255-
defined->originalIsec->getFile()->id);
1258+
Defined *funcBodySym = getFuncBodySym(defined);
1259+
symbolsNeedingStabs.emplace_back(SymbolStabInfo{
1260+
defined, funcBodySym->originalIsec->getFile()->id, funcBodySym});
12561261
}
12571262
}
12581263

12591264
llvm::stable_sort(symbolsNeedingStabs,
1260-
[&](const SortingPair &a, const SortingPair &b) {
1261-
return a.second < b.second;
1265+
[&](const SymbolStabInfo &a, const SymbolStabInfo &b) {
1266+
return a.fileId < b.fileId;
12621267
});
12631268

12641269
// Emit STABS symbols so that dsymutil and/or the debugger can map address
12651270
// regions in the final binary to the source and object files from which they
12661271
// originated.
12671272
InputFile *lastFile = nullptr;
1268-
for (SortingPair &pair : symbolsNeedingStabs) {
1269-
Defined *defined = pair.first;
1273+
for (const SymbolStabInfo &info : symbolsNeedingStabs) {
12701274
// We use 'originalIsec' of the symbol since we care about the actual origin
12711275
// of the symbol, not the canonical location returned by `isec()`.
1272-
InputSection *isec = defined->originalIsec;
1273-
ObjFile *file = cast<ObjFile>(isec->getFile());
1276+
InputSection *bodyIsec = info.mainBodySym->originalIsec;
1277+
ObjFile *file = cast<ObjFile>(bodyIsec->getFile());
12741278

12751279
if (lastFile == nullptr || lastFile != file) {
12761280
if (lastFile != nullptr)
@@ -1282,16 +1286,16 @@ void SymtabSection::emitStabs() {
12821286
}
12831287

12841288
StabsEntry symStab;
1285-
symStab.sect = isec->parent->index;
1286-
symStab.strx = stringTableSection.addString(defined->getName());
1287-
symStab.value = defined->getVA();
1289+
symStab.sect = bodyIsec->parent->index;
1290+
symStab.strx = stringTableSection.addString(info.originalSym->getName());
1291+
symStab.value = info.mainBodySym->getVA();
12881292

1289-
if (isCodeSection(isec)) {
1293+
if (isCodeSection(bodyIsec)) {
12901294
symStab.type = N_FUN;
12911295
stabs.emplace_back(std::move(symStab));
1292-
emitEndFunStab(defined);
1296+
emitEndFunStab(info.mainBodySym);
12931297
} else {
1294-
symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
1298+
symStab.type = info.originalSym->isExternal() ? N_GSYM : N_STSYM;
12951299
stabs.emplace_back(std::move(symStab));
12961300
}
12971301
}

lld/MachO/SyntheticSections.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,7 @@ class SymtabSection : public LinkEditSection {
485485
void emitEndSourceStab();
486486
void emitObjectFileStab(ObjFile *);
487487
void emitEndFunStab(Defined *);
488+
Defined *getFuncBodySym(Defined *);
488489
void emitStabs();
489490

490491
protected:

lld/MachO/Target.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ class TargetInfo {
8080
llvm_unreachable("target does not support ICF safe thunks");
8181
}
8282

83+
// Given a thunk for which `initICFSafeThunkBody` was called, return the
84+
// branchTarget it was initialized with.
85+
virtual InputSection *getThunkBranchTarget(InputSection *thunk) const {
86+
llvm_unreachable("target does not support ICF safe thunks");
87+
}
88+
8389
virtual uint32_t getICFSafeThunkSize() const {
8490
llvm_unreachable("target does not support ICF safe thunks");
8591
}

lld/test/MachO/icf-safe-thunks-dwarf.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,59 @@
2020
; VERIFY-STABS: N_FUN{{.*}}_func_A
2121
; VERIFY-STABS: N_FUN{{.*}}_take_func_addr
2222

23+
;;;;;;;;;;;;;;;;;;;;;;;;;;;; Check safe_thunks ICF + keeping STABS entries ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24+
25+
;;; Check scenario with where we do safe_thunks ICF and also generate STABS entries
26+
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/a_thunks.dylib %t/a.o
27+
; RUN: dsymutil -s %t/a_thunks.dylib > %t/a_thunks.txt
28+
29+
30+
; RUN: dsymutil --flat --verify-dwarf=none %t/a_thunks.dylib -o %t/a_thunks.dSYM
31+
; RUN: dsymutil -s %t/a_thunks.dSYM >> %t/a_thunks.txt
32+
; RUN: llvm-dwarfdump -a %t/a_thunks.dSYM >> %t/a_thunks.txt
33+
34+
; RUN: cat %t/a_thunks.txt | FileCheck %s --check-prefix=VERIFY-THUNK
35+
36+
# VERIFY-THUNK-LABEL: Symbol table for: '{{.*}}/a_thunks.dylib'
37+
# Capture the 'n_value's for N_FUN entries of _func_A, _func_B, and _func_C
38+
# VERIFY-THUNK: [[MERGED_FUN_ADDR:[0-9a-f]+]] '_func_A'
39+
# VERIFY-THUNK: [[MERGED_FUN_ADDR]] '_func_B'
40+
# VERIFY-THUNK: [[MERGED_FUN_ADDR]] '_func_C'
41+
42+
# Capture the 'n_value's for SECT EXT entries in the first part
43+
# VERIFY-THUNK: SECT EXT{{.*}} [[SECT_EXT_A_NVALUE:[0-9a-f]+]] '_func_A'
44+
# VERIFY-THUNK: SECT EXT{{.*}} [[SECT_EXT_B_NVALUE:[0-9a-f]+]] '_func_B'
45+
# VERIFY-THUNK: SECT EXT{{.*}} [[SECT_EXT_C_NVALUE:[0-9a-f]+]] '_func_C'
46+
47+
# VERIFY-THUNK: ----------------------------------------------------------------------
48+
# VERIFY-THUNK-LABEL: Symbol table for: '{{.*}}/a_thunks.dSYM'
49+
50+
# Verify that the SECT EXT 'n_value's in the second part match the first part
51+
# VERIFY-THUNK: SECT EXT{{.*}} [[SECT_EXT_A_NVALUE]] '_func_A'
52+
# VERIFY-THUNK: SECT EXT{{.*}} [[SECT_EXT_B_NVALUE]] '_func_B'
53+
# VERIFY-THUNK: SECT EXT{{.*}} [[SECT_EXT_C_NVALUE]] '_func_C'
54+
55+
# Ensure that N_FUN 'n_value's match the DW_TAG_subprogram's DW_AT_low_pc
56+
# and that the DW_AT_name is at a specific relative position
57+
58+
# VERIFY-THUNK-LABEL: .debug_info contents:
59+
# VERIFY-THUNK: Compile Unit: length = {{.*}}
60+
61+
# Match the subprogram for func_A
62+
# VERIFY-THUNK: : DW_TAG_subprogram
63+
# VERIFY-THUNK-NEXT: {{ +}}DW_AT_low_pc (0x[[MERGED_FUN_ADDR]])
64+
# VERIFY-THUNK-NEXT-NEXT-NEXT-NEXT-NEXT: {{ +}}DW_AT_name ("func_A")
65+
66+
# Match the subprogram for func_B
67+
# VERIFY-THUNK: : DW_TAG_subprogram
68+
# VERIFY-THUNK-NEXT: {{ +}}DW_AT_low_pc (0x[[MERGED_FUN_ADDR]])
69+
# VERIFY-THUNK-NEXT-NEXT-NEXT-NEXT-NEXT: {{ +}}DW_AT_name ("func_B")
70+
71+
# Match the subprogram for func_C
72+
# VERIFY-THUNK: : DW_TAG_subprogram
73+
# VERIFY-THUNK-NEXT: {{ +}}DW_AT_low_pc (0x[[MERGED_FUN_ADDR]])
74+
# VERIFY-THUNK-NEXT-NEXT-NEXT-NEXT-NEXT: {{ +}}DW_AT_name ("func_C")
75+
2376
;--- a.cpp
2477
#define ATTR __attribute__((noinline)) extern "C"
2578
typedef unsigned long long ULL;

0 commit comments

Comments
 (0)