Skip to content

Commit a751ed9

Browse files
vleonenVasily Leonenko
andauthored
[BOLT] Support runtime library hook via DT_INIT_ARRAY (llvm#167467)
Major part of this PR is commit implementing support for DT_INIT_ARRAY for BOLT runtime libraries initialization. Also, it adds related hook-init test & fixes couple of X86 instrumentation tests. This commit follows implementation of instrumentation hook via DT_FINI_ARRAY (llvm#67348) and extends it for BOLT runtime libraries (including instrumentation library) initialization hooking. Initialization has has differences compared to finalization: - Executables always use ELF entry point address. Update code checks it and updates init_array entry if ELF is shared library (have no interp entry) and have no DT_INIT entry. Also this commit introduces "runtime-lib-init-hook" option to select primary initialization hook (entry_point, init, init_array) with fall back to next available hook in input binary. e.g. in case of libc we can explicitly set it to init_array. - Shared library init_array entries relocations usually has R_AARCH64_ABS64 type on AArch64 binaries. We check relocation type and adjust methods for reading init_array relocations in discovery and update methods. --------- Co-authored-by: Vasily Leonenko <[email protected]>
1 parent bbb0dba commit a751ed9

File tree

9 files changed

+708
-26
lines changed

9 files changed

+708
-26
lines changed

bolt/docs/CommandLineArgumentReference.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,15 @@
811811

812812
Specify file name of the runtime instrumentation library
813813

814+
- `--runtime-lib-init-hook=<value>`
815+
816+
Primary target for hooking runtime library initialization, used in
817+
fallback order of availability in input binary (entry_point -> init
818+
-> init_array) (default: entry_point)
819+
- `entry_point`: use ELF Header Entry Point
820+
- `init`: use ELF DT_INIT entry
821+
- `init_array`: use ELF .init_array entry
822+
814823
- `--sctc-mode=<value>`
815824

816825
Mode for simplify conditional tail calls

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,15 @@ class BinaryContext {
807807
/// the execution of the binary is completed.
808808
std::optional<uint64_t> FiniFunctionAddress;
809809

810+
/// DT_INIT.
811+
std::optional<uint64_t> InitAddress;
812+
813+
/// DT_INIT_ARRAY. Only used when DT_INIT is not set.
814+
std::optional<uint64_t> InitArrayAddress;
815+
816+
/// DT_INIT_ARRAYSZ. Only used when DT_INIT is not set.
817+
std::optional<uint64_t> InitArraySize;
818+
810819
/// DT_FINI.
811820
std::optional<uint64_t> FiniAddress;
812821

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,23 @@ class RewriteInstance {
9393
/// section allocations if found.
9494
void discoverBOLTReserved();
9595

96+
/// Check whether we should use DT_INIT or DT_INIT_ARRAY for instrumentation.
97+
/// DT_INIT is preferred; DT_INIT_ARRAY is only used when no DT_INIT entry was
98+
/// found.
99+
Error discoverRtInitAddress();
100+
96101
/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
97102
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
98103
/// found.
99104
Error discoverRtFiniAddress();
100105

106+
/// If DT_INIT_ARRAY is used for instrumentation, update the relocation of its
107+
/// first entry to point to the instrumentation library's init address.
108+
Error updateRtInitReloc();
109+
101110
/// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
102111
/// first entry to point to the instrumentation library's fini address.
103-
void updateRtFiniReloc();
112+
Error updateRtFiniReloc();
104113

105114
/// Create and initialize metadata rewriters for this instance.
106115
void initializeMetadataManager();

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 212 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,28 @@ cl::bits<GadgetScannerKind> GadgetScannersToRun(
294294
clEnumValN(GS_ALL, "all", "All implemented scanners")),
295295
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(BinaryAnalysisCategory));
296296

297+
// Primary targets for hooking runtime library initialization hooking
298+
// with fallback to next item in case if current item is not available
299+
// in the input binary.
300+
enum RuntimeLibInitHookTarget : char {
301+
RLIH_ENTRY_POINT = 0, /// Use ELF Header Entry Point
302+
RLIH_INIT = 1, /// Use ELF DT_INIT entry
303+
RLIH_INIT_ARRAY = 2, /// Use ELF .init_array entry
304+
};
305+
306+
cl::opt<RuntimeLibInitHookTarget> RuntimeLibInitHook(
307+
"runtime-lib-init-hook",
308+
cl::desc("Primary target for hooking runtime library initialization, used "
309+
"in fallback order of availabiliy in input binary (entry_point -> "
310+
"init -> init_array) (default: entry_point)"),
311+
cl::Hidden, cl::init(RLIH_ENTRY_POINT),
312+
cl::values(clEnumValN(RLIH_ENTRY_POINT, "entry_point",
313+
"use ELF Header Entry Point"),
314+
clEnumValN(RLIH_INIT, "init", "use ELF DT_INIT entry"),
315+
clEnumValN(RLIH_INIT_ARRAY, "init_array",
316+
"use ELF .init_array entry")),
317+
cl::ZeroOrMore, cl::cat(BoltOptCategory));
318+
297319
} // namespace opts
298320

299321
// FIXME: implement a better way to mark sections for replacement.
@@ -741,9 +763,12 @@ Error RewriteInstance::run() {
741763
adjustCommandLineOptions();
742764
discoverFileObjects();
743765

744-
if (opts::Instrument && !BC->IsStaticExecutable)
766+
if (opts::Instrument && !BC->IsStaticExecutable) {
767+
if (Error E = discoverRtInitAddress())
768+
return E;
745769
if (Error E = discoverRtFiniAddress())
746770
return E;
771+
}
747772

748773
preprocessProfileData();
749774

@@ -785,8 +810,12 @@ Error RewriteInstance::run() {
785810

786811
updateMetadata();
787812

788-
if (opts::Instrument && !BC->IsStaticExecutable)
789-
updateRtFiniReloc();
813+
if (opts::Instrument && !BC->IsStaticExecutable) {
814+
if (Error E = updateRtInitReloc())
815+
return E;
816+
if (Error E = updateRtFiniReloc())
817+
return E;
818+
}
790819

791820
if (opts::OutputFilename == "/dev/null") {
792821
BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n";
@@ -1411,6 +1440,65 @@ void RewriteInstance::discoverBOLTReserved() {
14111440
NextAvailableAddress = BC->BOLTReserved.start();
14121441
}
14131442

1443+
Error RewriteInstance::discoverRtInitAddress() {
1444+
if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT)
1445+
return Error::success();
1446+
1447+
// Use DT_INIT if it's available.
1448+
if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT) {
1449+
BC->StartFunctionAddress = BC->InitAddress;
1450+
return Error::success();
1451+
}
1452+
1453+
if (!BC->InitArrayAddress || !BC->InitArraySize) {
1454+
return createStringError(std::errc::not_supported,
1455+
"Instrumentation of shared library needs either "
1456+
"DT_INIT or DT_INIT_ARRAY");
1457+
}
1458+
1459+
if (*BC->InitArraySize < BC->AsmInfo->getCodePointerSize()) {
1460+
return createStringError(std::errc::not_supported,
1461+
"Need at least 1 DT_INIT_ARRAY slot");
1462+
}
1463+
1464+
ErrorOr<BinarySection &> InitArraySection =
1465+
BC->getSectionForAddress(*BC->InitArrayAddress);
1466+
if (auto EC = InitArraySection.getError())
1467+
return errorCodeToError(EC);
1468+
1469+
if (InitArraySection->getAddress() != *BC->InitArrayAddress) {
1470+
return createStringError(std::errc::not_supported,
1471+
"Inconsistent address of .init_array section");
1472+
}
1473+
1474+
if (const Relocation *Reloc = InitArraySection->getDynamicRelocationAt(0)) {
1475+
if (Reloc->isRelative()) {
1476+
BC->StartFunctionAddress = Reloc->Addend;
1477+
} else {
1478+
MCSymbol *Sym = Reloc->Symbol;
1479+
if (!Sym)
1480+
return createStringError(
1481+
std::errc::not_supported,
1482+
"Failed to locate symbol for 0 entry of .init_array");
1483+
const BinaryFunction *BF = BC->getFunctionForSymbol(Sym);
1484+
if (!BF)
1485+
return createStringError(
1486+
std::errc::not_supported,
1487+
"Failed to locate binary function for 0 entry of .init_array");
1488+
BC->StartFunctionAddress = BF->getAddress() + Reloc->Addend;
1489+
}
1490+
return Error::success();
1491+
}
1492+
1493+
if (const Relocation *Reloc = InitArraySection->getRelocationAt(0)) {
1494+
BC->StartFunctionAddress = Reloc->Value;
1495+
return Error::success();
1496+
}
1497+
1498+
return createStringError(std::errc::not_supported,
1499+
"No relocation for first DT_INIT_ARRAY slot");
1500+
}
1501+
14141502
Error RewriteInstance::discoverRtFiniAddress() {
14151503
// Use DT_FINI if it's available.
14161504
if (BC->FiniAddress) {
@@ -1434,6 +1522,11 @@ Error RewriteInstance::discoverRtFiniAddress() {
14341522
if (auto EC = FiniArraySection.getError())
14351523
return errorCodeToError(EC);
14361524

1525+
if (FiniArraySection->getAddress() != *BC->FiniArrayAddress) {
1526+
return createStringError(std::errc::not_supported,
1527+
"Inconsistent address of .fini_array section");
1528+
}
1529+
14371530
if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
14381531
BC->FiniFunctionAddress = Reloc->Addend;
14391532
return Error::success();
@@ -1448,26 +1541,95 @@ Error RewriteInstance::discoverRtFiniAddress() {
14481541
"No relocation for first DT_FINI_ARRAY slot");
14491542
}
14501543

1451-
void RewriteInstance::updateRtFiniReloc() {
1544+
Error RewriteInstance::updateRtInitReloc() {
1545+
if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT)
1546+
return Error::success();
1547+
1548+
// Updating DT_INIT is handled by patchELFDynamic.
1549+
if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT)
1550+
return Error::success();
1551+
1552+
const RuntimeLibrary *RT = BC->getRuntimeLibrary();
1553+
if (!RT || !RT->getRuntimeStartAddress())
1554+
return Error::success();
1555+
1556+
if (!BC->InitArrayAddress)
1557+
return Error::success();
1558+
1559+
if (!BC->InitArrayAddress || !BC->InitArraySize)
1560+
return createStringError(std::errc::not_supported,
1561+
"inconsistent .init_array state");
1562+
1563+
ErrorOr<BinarySection &> InitArraySection =
1564+
BC->getSectionForAddress(*BC->InitArrayAddress);
1565+
if (!InitArraySection)
1566+
return createStringError(std::errc::not_supported, ".init_array removed");
1567+
1568+
if (std::optional<Relocation> Reloc =
1569+
InitArraySection->takeDynamicRelocationAt(0)) {
1570+
if (Reloc->isRelative()) {
1571+
if (Reloc->Addend != BC->StartFunctionAddress)
1572+
return createStringError(std::errc::not_supported,
1573+
"inconsistent .init_array dynamic relocation");
1574+
Reloc->Addend = RT->getRuntimeStartAddress();
1575+
InitArraySection->addDynamicRelocation(*Reloc);
1576+
} else {
1577+
MCSymbol *Sym = Reloc->Symbol;
1578+
if (!Sym)
1579+
return createStringError(
1580+
std::errc::not_supported,
1581+
"Failed to locate symbol for 0 entry of .init_array");
1582+
const BinaryFunction *BF = BC->getFunctionForSymbol(Sym);
1583+
if (!BF)
1584+
return createStringError(
1585+
std::errc::not_supported,
1586+
"Failed to locate binary function for 0 entry of .init_array");
1587+
if (BF->getAddress() + Reloc->Addend != BC->StartFunctionAddress)
1588+
return createStringError(std::errc::not_supported,
1589+
"inconsistent .init_array dynamic relocation");
1590+
InitArraySection->addDynamicRelocation(Relocation{
1591+
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
1592+
/*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
1593+
}
1594+
}
1595+
// Update the static relocation by adding a pending relocation which will get
1596+
// patched when flushPendingRelocations is called in rewriteFile. Note that
1597+
// flushPendingRelocations will calculate the value to patch as
1598+
// "Symbol + Addend". Since we don't have a symbol, just set the addend to the
1599+
// desired value.
1600+
InitArraySection->addPendingRelocation(Relocation{
1601+
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
1602+
/*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
1603+
BC->outs()
1604+
<< "BOLT-INFO: runtime library initialization was hooked via .init_array "
1605+
"entry, set to 0x"
1606+
<< Twine::utohexstr(RT->getRuntimeStartAddress()) << "\n";
1607+
return Error::success();
1608+
}
1609+
1610+
Error RewriteInstance::updateRtFiniReloc() {
14521611
// Updating DT_FINI is handled by patchELFDynamic.
14531612
if (BC->FiniAddress)
1454-
return;
1613+
return Error::success();
14551614

14561615
const RuntimeLibrary *RT = BC->getRuntimeLibrary();
14571616
if (!RT || !RT->getRuntimeFiniAddress())
1458-
return;
1617+
return Error::success();
14591618

1460-
assert(BC->FiniArrayAddress && BC->FiniArraySize &&
1461-
"inconsistent .fini_array state");
1619+
if (!BC->FiniArrayAddress || !BC->FiniArraySize)
1620+
return createStringError(std::errc::not_supported,
1621+
"inconsistent .fini_array state");
14621622

14631623
ErrorOr<BinarySection &> FiniArraySection =
14641624
BC->getSectionForAddress(*BC->FiniArrayAddress);
1465-
assert(FiniArraySection && ".fini_array removed");
1625+
if (!FiniArraySection)
1626+
return createStringError(std::errc::not_supported, ".fini_array removed");
14661627

14671628
if (std::optional<Relocation> Reloc =
14681629
FiniArraySection->takeDynamicRelocationAt(0)) {
1469-
assert(Reloc->Addend == BC->FiniFunctionAddress &&
1470-
"inconsistent .fini_array dynamic relocation");
1630+
if (Reloc->Addend != BC->FiniFunctionAddress)
1631+
return createStringError(std::errc::not_supported,
1632+
"inconsistent .fini_array dynamic relocation");
14711633
Reloc->Addend = RT->getRuntimeFiniAddress();
14721634
FiniArraySection->addDynamicRelocation(*Reloc);
14731635
}
@@ -1480,6 +1642,10 @@ void RewriteInstance::updateRtFiniReloc() {
14801642
FiniArraySection->addPendingRelocation(Relocation{
14811643
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
14821644
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
1645+
BC->outs() << "BOLT-INFO: runtime library finalization was hooked via "
1646+
".fini_array entry, set to 0x"
1647+
<< Twine::utohexstr(RT->getRuntimeFiniAddress()) << "\n";
1648+
return Error::success();
14831649
}
14841650

14851651
void RewriteInstance::registerFragments() {
@@ -2178,6 +2344,14 @@ void RewriteInstance::adjustCommandLineOptions() {
21782344
exit(1);
21792345
}
21802346

2347+
if (opts::Instrument && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT &&
2348+
!BC->HasInterpHeader) {
2349+
BC->errs()
2350+
<< "BOLT-WARNING: adjusted runtime-lib-init-hook to 'init' due to "
2351+
"absence of INTERP header\n";
2352+
opts::RuntimeLibInitHook = opts::RLIH_INIT;
2353+
}
2354+
21812355
if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
21822356
opts::HotTextMoveSections.addValue(".stub");
21832357
opts::HotTextMoveSections.addValue(".mover");
@@ -4849,9 +5023,14 @@ void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
48495023
ELFEhdrTy NewEhdr = Obj.getHeader();
48505024

48515025
if (BC->HasRelocations) {
4852-
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
5026+
RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
5027+
if (RtLibrary && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT) {
48535028
NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
4854-
else
5029+
BC->outs()
5030+
<< "BOLT-INFO: runtime library initialization was hooked via ELF "
5031+
"Header Entry Point, set to 0x"
5032+
<< Twine::utohexstr(NewEhdr.e_entry) << "\n";
5033+
} else
48555034
NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
48565035
assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
48575036
"cannot find new address for entry point");
@@ -5692,14 +5871,23 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
56925871
}
56935872
RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
56945873
if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
5695-
if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
5874+
if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) {
56965875
NewDE.d_un.d_ptr = Addr;
5876+
BC->outs()
5877+
<< "BOLT-INFO: runtime library finalization was hooked via "
5878+
"DT_FINI, set to 0x"
5879+
<< Twine::utohexstr(Addr) << "\n";
5880+
}
56975881
}
5698-
if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
5882+
if (RtLibrary && Dyn.getTag() == ELF::DT_INIT &&
5883+
(!BC->HasInterpHeader ||
5884+
opts::RuntimeLibInitHook == opts::RLIH_INIT)) {
56995885
if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
5700-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
5701-
<< Twine::utohexstr(Addr) << '\n');
57025886
NewDE.d_un.d_ptr = Addr;
5887+
BC->outs()
5888+
<< "BOLT-INFO: runtime library initialization was hooked via "
5889+
"DT_INIT, set to 0x"
5890+
<< Twine::utohexstr(Addr) << "\n";
57035891
}
57045892
}
57055893
break;
@@ -5767,10 +5955,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
57675955
for (const Elf_Dyn &Dyn : DynamicEntries) {
57685956
switch (Dyn.d_tag) {
57695957
case ELF::DT_INIT:
5770-
if (!BC->HasInterpHeader) {
5771-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
5772-
BC->StartFunctionAddress = Dyn.getPtr();
5773-
}
5958+
BC->InitAddress = Dyn.getPtr();
5959+
break;
5960+
case ELF::DT_INIT_ARRAY:
5961+
BC->InitArrayAddress = Dyn.getPtr();
5962+
break;
5963+
case ELF::DT_INIT_ARRAYSZ:
5964+
BC->InitArraySize = Dyn.getPtr();
57745965
break;
57755966
case ELF::DT_FINI:
57765967
BC->FiniAddress = Dyn.getPtr();

0 commit comments

Comments
 (0)