Skip to content

Commit 6e8551f

Browse files
committed
[Bolt][Instrumentation] Add support for DT_INIT_ARRAY
Previously Bolt relied on ELF 'e_entry' field or DT_INIT to determine the entry point of an ELF file for the instrumentation. This PR aims to handle that case if an ELF file only contains DT_INIT_ARRAY/DT_FINI_ARRAY sections. Bolt is hooking its runtime function based on e_entry address if the input is an ELF executable. When the input is a shared object, Bolt takes address of DT_INIT if that exists. If it doesn't, Bolt will use DT_INIT_ARRAY for hooking its runtime functions. This PR follows the implementation of DT_FINI_ARRAY.
1 parent f24c50a commit 6e8551f

File tree

5 files changed

+215
-10
lines changed

5 files changed

+215
-10
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,15 @@ class BinaryContext {
800800
/// the execution of the binary is completed.
801801
std::optional<uint64_t> FiniFunctionAddress;
802802

803+
/// DT_INIT. Used when DT_INIT is available.
804+
std::optional<uint64_t> InitAddress;
805+
806+
/// DT_INIT_ARRAY. Only used when DT_INIT is not set.
807+
std::optional<uint64_t> InitArrayAddress;
808+
809+
/// DT_INIT_ARRAYSZ. Only used when DT_INIT is not set.
810+
std::optional<uint64_t> InitArraySize;
811+
803812
/// DT_FINI.
804813
std::optional<uint64_t> FiniAddress;
805814

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,20 @@ class RewriteInstance {
9393
/// section allocations if found.
9494
void discoverBOLTReserved();
9595

96+
/// Check whether we should use DT_INIT or DT_INIT_ARRAY for instrumentation.
97+
/// DT_INIT is preferred; DT_INIT_ARRAY is only used when no DT_INIT entry was
98+
/// found.
99+
Error discoverRtInitAddress();
100+
96101
/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
97102
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
98103
/// found.
99104
Error discoverRtFiniAddress();
100105

106+
/// If DT_INIT_ARRAY is used for instrumentation, update the relocation of its
107+
/// first entry to point to the instrumentation library's init address.
108+
void updateRtInitReloc();
109+
101110
/// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
102111
/// first entry to point to the instrumentation library's fini address.
103112
void updateRtFiniReloc();

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -708,9 +708,13 @@ Error RewriteInstance::run() {
708708
adjustCommandLineOptions();
709709
discoverFileObjects();
710710

711-
if (opts::Instrument && !BC->IsStaticExecutable)
711+
if (opts::Instrument && !BC->IsStaticExecutable) {
712+
if (!BC->HasInterpHeader)
713+
if (Error E = discoverRtInitAddress())
714+
return E;
712715
if (Error E = discoverRtFiniAddress())
713716
return E;
717+
}
714718

715719
preprocessProfileData();
716720

@@ -752,8 +756,10 @@ Error RewriteInstance::run() {
752756

753757
updateMetadata();
754758

755-
if (opts::Instrument && !BC->IsStaticExecutable)
759+
if (opts::Instrument && !BC->IsStaticExecutable) {
760+
updateRtInitReloc();
756761
updateRtFiniReloc();
762+
}
757763

758764
if (opts::OutputFilename == "/dev/null") {
759765
BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n";
@@ -1381,6 +1387,46 @@ void RewriteInstance::discoverBOLTReserved() {
13811387
NextAvailableAddress = BC->BOLTReserved.start();
13821388
}
13831389

1390+
Error RewriteInstance::discoverRtInitAddress() {
1391+
// Use init address if it is available.
1392+
if (BC->InitAddress) {
1393+
BC->StartFunctionAddress = BC->InitAddress;
1394+
return Error::success();
1395+
}
1396+
1397+
if (BC->InitArrayAddress || BC->InitArraySize) {
1398+
if (*BC->InitArraySize < BC->AsmInfo->getCodePointerSize()) {
1399+
return createStringError(std::errc::not_supported,
1400+
"Need at least 1 DT_INIT_ARRAY slot");
1401+
}
1402+
1403+
ErrorOr<BinarySection &> InitArraySection =
1404+
BC->getSectionForAddress(*BC->InitArrayAddress);
1405+
if (auto EC = InitArraySection.getError())
1406+
return errorCodeToError(EC);
1407+
1408+
if (const Relocation *Reloc = InitArraySection->getDynamicRelocationAt(0)) {
1409+
BC->StartFunctionAddress = Reloc->Addend;
1410+
return Error::success();
1411+
}
1412+
1413+
if (const Relocation *Reloc = InitArraySection->getRelocationAt(0)) {
1414+
BC->StartFunctionAddress = Reloc->Value;
1415+
return Error::success();
1416+
}
1417+
1418+
return createStringError(std::errc::not_supported,
1419+
"No relocation for first DT_INIT_ARRAY slot");
1420+
}
1421+
1422+
if (BC->StartFunctionAddress && BC->StartFunctionAddress.value() != 0)
1423+
return Error::success();
1424+
1425+
return createStringError(
1426+
std::errc::not_supported,
1427+
"Instrumentation needs any of ELF e_entry, DT_INIT or DT_INIT_ARRAY");
1428+
}
1429+
13841430
Error RewriteInstance::discoverRtFiniAddress() {
13851431
// Use DT_FINI if it's available.
13861432
if (BC->FiniAddress) {
@@ -1452,6 +1498,40 @@ void RewriteInstance::updateRtFiniReloc() {
14521498
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
14531499
}
14541500

1501+
void RewriteInstance::updateRtInitReloc() {
1502+
// Updating DT_INIT is handled by patchELFDynamic.
1503+
if (BC->InitAddress || !BC->InitArrayAddress)
1504+
return;
1505+
1506+
const RuntimeLibrary *RT = BC->getRuntimeLibrary();
1507+
if (!RT || !RT->getRuntimeStartAddress())
1508+
return;
1509+
1510+
assert(BC->InitArrayAddress && BC->InitArraySize &&
1511+
"inconsistent .init_array state");
1512+
1513+
ErrorOr<BinarySection &> InitArraySection =
1514+
BC->getSectionForAddress(*BC->InitArrayAddress);
1515+
assert(InitArraySection && ".init_array removed");
1516+
1517+
if (std::optional<Relocation> Reloc =
1518+
InitArraySection->takeDynamicRelocationAt(0)) {
1519+
assert(Reloc->Addend == BC->StartFunctionAddress &&
1520+
"inconsistent .init_array dynamic relocation");
1521+
Reloc->Addend = RT->getRuntimeStartAddress();
1522+
InitArraySection->addDynamicRelocation(*Reloc);
1523+
}
1524+
1525+
// Update the static relocation by adding a pending relocation which will get
1526+
// patched when flushPendingRelocations is called in rewriteFile. Note that
1527+
// flushPendingRelocations will calculate the value to patch as
1528+
// "Symbol + Addend". Since we don't have a symbol, just set the addend to the
1529+
// desired value.
1530+
InitArraySection->addPendingRelocation(Relocation{
1531+
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
1532+
/*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
1533+
}
1534+
14551535
void RewriteInstance::registerFragments() {
14561536
if (!BC->HasSplitFunctions ||
14571537
opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive)
@@ -5705,8 +5785,18 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
57055785
switch (Dyn.d_tag) {
57065786
case ELF::DT_INIT:
57075787
if (!BC->HasInterpHeader) {
5708-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
5709-
BC->StartFunctionAddress = Dyn.getPtr();
5788+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set init address\n");
5789+
BC->InitAddress = Dyn.getPtr();
5790+
}
5791+
break;
5792+
case ELF::DT_INIT_ARRAY:
5793+
if (!BC->HasInterpHeader) {
5794+
BC->InitArrayAddress = Dyn.getPtr();
5795+
}
5796+
break;
5797+
case ELF::DT_INIT_ARRAYSZ:
5798+
if (!BC->HasInterpHeader) {
5799+
BC->InitArraySize = Dyn.getPtr();
57105800
}
57115801
break;
57125802
case ELF::DT_FINI:

bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,6 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
5151
opts::JumpTables = JTS_MOVE;
5252
outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
5353
}
54-
if (!BC.StartFunctionAddress) {
55-
errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
56-
"entry point of "
57-
"the input binary\n";
58-
exit(1);
59-
}
6054

6155
if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) {
6256
errs() << "BOLT-ERROR: instrumentation of static binary currently does not "
@@ -78,6 +72,13 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
7872

7973
void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC,
8074
MCStreamer &Streamer) {
75+
/* if (!BC.StartFunctionAddress) {
76+
errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
77+
"entry point of "
78+
"the input binary\n";
79+
exit(1);
80+
}*/
81+
8182
MCSection *Section = BC.isELF()
8283
? static_cast<MCSection *>(BC.Ctx->getELFSection(
8384
".bolt.instr.counters", ELF::SHT_PROGBITS,

bolt/test/AArch64/hook-init.s

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
## Test the different ways of handling entry point for instrumentation.
2+
## Bolt is hooking its runtime function via Elf entry, DT_INIT or DT_INIT_ARRAYS.
3+
## Bolt uses Elf e_entry address for ELF executable, and DT_INIT address
4+
## for ELF shared object to determine the start address.
5+
## The Test is checking the following cases:
6+
## - For executable, check ELF e_entry is pathced.
7+
## - For shared object:
8+
## - Bolt use DT_INIT for hooking runtime start function if that exists.
9+
## - If it doesn't exists, DT_INIT_ARRAY takes its place.
10+
# REQUIRES: system-linux,bolt-runtime,target=aarch64{{.*}}
11+
12+
## Check e_entry address is updated with ELF PIE executable.
13+
# RUN: %clang %cflags -pie %s -Wl,-q -o %t.exe
14+
# RUN: llvm-readelf -l %t.exe | FileCheck --check-prefix=CHECK-INTERP %s
15+
# RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
16+
# RUN: llvm-readelf -hs %t.exe | FileCheck --check-prefix=CHECK-START %s
17+
# RUN: llvm-bolt %t.exe -o %t --instrument
18+
# RUN: llvm-readelf -dhs %t | FileCheck --check-prefix=CHECK-ENTRY %s
19+
20+
## Create a shared library to use DT_INIT for the instrumentation.
21+
# RUN: %clang %cflags -fPIC -shared %s -Wl,-q -o %t-init.so
22+
# RUN: llvm-bolt %t-init.so -o %t-init --instrument
23+
# RUN: llvm-readelf -drs %t-init | FileCheck --check-prefix=CHECK-INIT %s
24+
25+
# Create a shared library with no init to use DT_INIT_ARRAY for the instrumentation.
26+
# RUN: %clang %cflags -shared %s -Wl,-q,-init=0 -o %t-no-init.so
27+
# RUN: llvm-bolt %t-no-init.so -o %t-no-init --instrument
28+
# RUN: llvm-readelf -drs %t-no-init | FileCheck --check-prefix=CHECK-NO-INIT %s
29+
30+
## Check the binary has InterP header
31+
# CHECK-INTERP: Program Headers:
32+
# CHECK-INTERP: INTERP
33+
34+
## With PIE: binary should have relative relocations
35+
# RELOC-PIE: R_AARCH64_RELATIVE
36+
37+
## ELF excecutable where e_entry is set to __bolt_runtime_start (PIE).
38+
## Check the input that e_entry points to _start by default.
39+
# CHECK-START: ELF Header:
40+
# CHECK-START-DAG: Entry point address: 0x[[ENTRY:[[:xdigit:]]+]]
41+
# CHECK-START: Symbol table '.symtab' contains {{.*}} entries:
42+
# CHECK-START-DAG: {{0+}}[[ENTRY]] {{.*}} _start
43+
## Check that e_entry is set to __bolt_runtime_start after the instrumentation.
44+
# CHECK-ENTRY: ELF Header:
45+
# CHECK-ENTRY-DAG: Entry point address: 0x[[ENTRY:[[:xdigit:]]+]]
46+
# CHECK-ENTRY: Symbol table '.symtab' contains {{.*}} entries:
47+
# CHECK-ENTRY-DAG: {{0+}}[[ENTRY]] {{.*}} __bolt_runtime_start
48+
49+
## Check that DT_INIT is set to __bolt_runtime_start.
50+
# CHECK-INIT: Dynamic section at offset {{.*}} contains {{.*}} entries:
51+
# CHECK-INIT-DAG: (INIT) 0x[[INIT:[[:xdigit:]]+]]
52+
# CHECK-INIT-DAG: (INIT_ARRAY) 0x[[INIT_ARRAY:[[:xdigit:]]+]]
53+
## Check that the dynamic relocation at .init_array was not patched
54+
# CHECK-INIT: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
55+
# CHECK-INIT: {{0+}}[[INIT_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[MYINIT_ADDR:[[:xdigit:]]+]
56+
]
57+
# CHECK-INIT: Symbol table '.symtab' contains {{.*}} entries:
58+
# CHECK-INIT-DAG: {{0+}}[[MYINIT_ADDR]] {{.*}} _myinit
59+
60+
## Check that DT_INIT_ARRAY is set to __bolt_runtime_start.
61+
# CHECK-NO-INIT: Dynamic section at offset {{.*}} contains {{.*}} entries:
62+
# CHECK-NO-INIT-NOT: (INIT)
63+
# CHECK-NO-INIT: (INIT_ARRAY) 0x[[INIT_ARRAY:[a-f0-9]+]]
64+
# CHECK-NO-INIT: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
65+
# CHECK-NO-INIT: {{0+}}[[INIT_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[INIT_ADDR:[[:xdigit:]]+]]
66+
# CHECK-NO-INIT: Symbol table '.symtab' contains {{.*}} entries:
67+
# CHECK-NO-INIT-DAG: {{0+}}[[INIT_ADDR]] {{.*}} __bolt_runtime_start
68+
69+
.globl _start
70+
.type _start, %function
71+
_start:
72+
# Dummy relocation to force relocation mode.
73+
.reloc 0, R_AARCH64_NONE
74+
ret
75+
.size _start, .-_start
76+
77+
.globl _init
78+
.type _init, %function
79+
_init:
80+
ret
81+
.size _init, .-_init
82+
83+
.globl _fini
84+
.type _fini, %function
85+
_fini:
86+
ret
87+
.size _fini, .-_fini
88+
89+
.section .text
90+
_myinit:
91+
ret
92+
.size _myinit, .-_myinit
93+
94+
.section .init_array,"aw"
95+
.align 3
96+
.dword _myinit # For relative relocation

0 commit comments

Comments
 (0)