Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions bolt/docs/CommandLineArgumentReference.md
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,15 @@

Specify file name of the runtime instrumentation library

- `--runtime-lib-init-hook=<value>`

Primary target for hooking runtime library initialization, used in
fallback order of availability in input binary (entry_point -> init
-> init_array) (default: entry_point)
- `entry_point`: use ELF Header Entry Point
- `init`: use ELF DT_INIT entry
- `init_array`: use ELF .init_array entry

- `--sctc-mode=<value>`

Mode for simplify conditional tail calls
Expand Down
9 changes: 9 additions & 0 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,15 @@ class BinaryContext {
/// the execution of the binary is completed.
std::optional<uint64_t> FiniFunctionAddress;

/// DT_INIT.
std::optional<uint64_t> InitAddress;

/// DT_INIT_ARRAY. Only used when DT_INIT is not set.
std::optional<uint64_t> InitArrayAddress;

/// DT_INIT_ARRAYSZ. Only used when DT_INIT is not set.
std::optional<uint64_t> InitArraySize;

/// DT_FINI.
std::optional<uint64_t> FiniAddress;

Expand Down
11 changes: 10 additions & 1 deletion bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,23 @@ class RewriteInstance {
/// section allocations if found.
void discoverBOLTReserved();

/// Check whether we should use DT_INIT or DT_INIT_ARRAY for instrumentation.
/// DT_INIT is preferred; DT_INIT_ARRAY is only used when no DT_INIT entry was
/// found.
Error discoverRtInitAddress();

/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
/// found.
Error discoverRtFiniAddress();

/// If DT_INIT_ARRAY is used for instrumentation, update the relocation of its
/// first entry to point to the instrumentation library's init address.
Error updateRtInitReloc();

/// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
/// first entry to point to the instrumentation library's fini address.
void updateRtFiniReloc();
Error updateRtFiniReloc();

/// Create and initialize metadata rewriters for this instance.
void initializeMetadataManager();
Expand Down
233 changes: 212 additions & 21 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,28 @@ cl::bits<GadgetScannerKind> GadgetScannersToRun(
clEnumValN(GS_ALL, "all", "All implemented scanners")),
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(BinaryAnalysisCategory));

// Primary targets for hooking runtime library initialization hooking
// with fallback to next item in case if current item is not available
// in the input binary.
enum RuntimeLibInitHookTarget : char {
RLIH_ENTRY_POINT = 0, /// Use ELF Header Entry Point
RLIH_INIT = 1, /// Use ELF DT_INIT entry
RLIH_INIT_ARRAY = 2, /// Use ELF .init_array entry
};

cl::opt<RuntimeLibInitHookTarget> RuntimeLibInitHook(
"runtime-lib-init-hook",
cl::desc("Primary target for hooking runtime library initialization, used "
"in fallback order of availabiliy in input binary (entry_point -> "
"init -> init_array) (default: entry_point)"),
cl::Hidden, cl::init(RLIH_ENTRY_POINT),
cl::values(clEnumValN(RLIH_ENTRY_POINT, "entry_point",
"use ELF Header Entry Point"),
clEnumValN(RLIH_INIT, "init", "use ELF DT_INIT entry"),
clEnumValN(RLIH_INIT_ARRAY, "init_array",
"use ELF .init_array entry")),
cl::ZeroOrMore, cl::cat(BoltOptCategory));

} // namespace opts

// FIXME: implement a better way to mark sections for replacement.
Expand Down Expand Up @@ -741,9 +763,12 @@ Error RewriteInstance::run() {
adjustCommandLineOptions();
discoverFileObjects();

if (opts::Instrument && !BC->IsStaticExecutable)
if (opts::Instrument && !BC->IsStaticExecutable) {
if (Error E = discoverRtInitAddress())
return E;
if (Error E = discoverRtFiniAddress())
return E;
}

preprocessProfileData();

Expand Down Expand Up @@ -785,8 +810,12 @@ Error RewriteInstance::run() {

updateMetadata();

if (opts::Instrument && !BC->IsStaticExecutable)
updateRtFiniReloc();
if (opts::Instrument && !BC->IsStaticExecutable) {
if (Error E = updateRtInitReloc())
return E;
if (Error E = updateRtFiniReloc())
return E;
}

if (opts::OutputFilename == "/dev/null") {
BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n";
Expand Down Expand Up @@ -1411,6 +1440,65 @@ void RewriteInstance::discoverBOLTReserved() {
NextAvailableAddress = BC->BOLTReserved.start();
}

Error RewriteInstance::discoverRtInitAddress() {
if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT)
return Error::success();

// Use DT_INIT if it's available.
if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT) {
BC->StartFunctionAddress = BC->InitAddress;
return Error::success();
}

if (!BC->InitArrayAddress || !BC->InitArraySize) {
return createStringError(std::errc::not_supported,
"Instrumentation of shared library needs either "
"DT_INIT or DT_INIT_ARRAY");
}

if (*BC->InitArraySize < BC->AsmInfo->getCodePointerSize()) {
return createStringError(std::errc::not_supported,
"Need at least 1 DT_INIT_ARRAY slot");
}

ErrorOr<BinarySection &> InitArraySection =
BC->getSectionForAddress(*BC->InitArrayAddress);
if (auto EC = InitArraySection.getError())
return errorCodeToError(EC);

if (InitArraySection->getAddress() != *BC->InitArrayAddress) {
return createStringError(std::errc::not_supported,
"Inconsistent address of .init_array section");
}

if (const Relocation *Reloc = InitArraySection->getDynamicRelocationAt(0)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should allow DT_INIT_ARRAY to start at non-zero offset and check relocation at that offset.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will probably require saving the offset in the init_array/fini_array used for hooking into the extra BinaryContext variable. Additionally, I am not sure how to generate such a binary, except perhaps by manually removing relocations for first init_array/fini_array entry in ELF and then using obj2yaml/yaml2obj. Any suggestions on this would be greatly appreciated.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. While in theory, DT_INIT_ARRAY can have a non-zero offset in a section, the linker uses .init_array section to generate this dynamic entry, and for all practical purposes zero offset is a valid assumption. Let's validate this assumption explicitly in the code and keep the zero offset.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What kind of additional explicit validation you have in mind? discoverRtInitAddress() already checks if relocation at 0 offset exists and fails otherwise.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What kind of additional explicit validation you have in mind? discoverRtInitAddress() already checks if relocation at 0 offset exists and fails otherwise.

Check for InitArraySection->getAddress() == *BC->InitArrayAddress.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Added as extra commit, because same check should be present for fini_array for consistency.

if (Reloc->isRelative()) {
BC->StartFunctionAddress = Reloc->Addend;
} else {
MCSymbol *Sym = Reloc->Symbol;
if (!Sym)
return createStringError(
std::errc::not_supported,
"Failed to locate symbol for 0 entry of .init_array");
const BinaryFunction *BF = BC->getFunctionForSymbol(Sym);
if (!BF)
return createStringError(
std::errc::not_supported,
"Failed to locate binary function for 0 entry of .init_array");
BC->StartFunctionAddress = BF->getAddress() + Reloc->Addend;
}
return Error::success();
}

if (const Relocation *Reloc = InitArraySection->getRelocationAt(0)) {
BC->StartFunctionAddress = Reloc->Value;
return Error::success();
}

return createStringError(std::errc::not_supported,
"No relocation for first DT_INIT_ARRAY slot");
}

Error RewriteInstance::discoverRtFiniAddress() {
// Use DT_FINI if it's available.
if (BC->FiniAddress) {
Expand All @@ -1434,6 +1522,11 @@ Error RewriteInstance::discoverRtFiniAddress() {
if (auto EC = FiniArraySection.getError())
return errorCodeToError(EC);

if (FiniArraySection->getAddress() != *BC->FiniArrayAddress) {
return createStringError(std::errc::not_supported,
"Inconsistent address of .fini_array section");
}

if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
BC->FiniFunctionAddress = Reloc->Addend;
return Error::success();
Expand All @@ -1448,26 +1541,95 @@ Error RewriteInstance::discoverRtFiniAddress() {
"No relocation for first DT_FINI_ARRAY slot");
}

void RewriteInstance::updateRtFiniReloc() {
Error RewriteInstance::updateRtInitReloc() {
if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT)
return Error::success();

// Updating DT_INIT is handled by patchELFDynamic.
if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT)
return Error::success();

const RuntimeLibrary *RT = BC->getRuntimeLibrary();
if (!RT || !RT->getRuntimeStartAddress())
return Error::success();

if (!BC->InitArrayAddress)
return Error::success();

if (!BC->InitArrayAddress || !BC->InitArraySize)
return createStringError(std::errc::not_supported,
"inconsistent .init_array state");

ErrorOr<BinarySection &> InitArraySection =
BC->getSectionForAddress(*BC->InitArrayAddress);
if (!InitArraySection)
return createStringError(std::errc::not_supported, ".init_array removed");

if (std::optional<Relocation> Reloc =
InitArraySection->takeDynamicRelocationAt(0)) {
if (Reloc->isRelative()) {
if (Reloc->Addend != BC->StartFunctionAddress)
return createStringError(std::errc::not_supported,
"inconsistent .init_array dynamic relocation");
Reloc->Addend = RT->getRuntimeStartAddress();
InitArraySection->addDynamicRelocation(*Reloc);
} else {
MCSymbol *Sym = Reloc->Symbol;
if (!Sym)
return createStringError(
std::errc::not_supported,
"Failed to locate symbol for 0 entry of .init_array");
const BinaryFunction *BF = BC->getFunctionForSymbol(Sym);
if (!BF)
return createStringError(
std::errc::not_supported,
"Failed to locate binary function for 0 entry of .init_array");
if (BF->getAddress() + Reloc->Addend != BC->StartFunctionAddress)
return createStringError(std::errc::not_supported,
"inconsistent .init_array dynamic relocation");
InitArraySection->addDynamicRelocation(Relocation{
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
/*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
}
}
// Update the static relocation by adding a pending relocation which will get
// patched when flushPendingRelocations is called in rewriteFile. Note that
// flushPendingRelocations will calculate the value to patch as
// "Symbol + Addend". Since we don't have a symbol, just set the addend to the
// desired value.
InitArraySection->addPendingRelocation(Relocation{
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
/*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
BC->outs()
<< "BOLT-INFO: runtime library initialization was hooked via .init_array "
"entry, set to 0x"
<< Twine::utohexstr(RT->getRuntimeStartAddress()) << "\n";
return Error::success();
}

Error RewriteInstance::updateRtFiniReloc() {
// Updating DT_FINI is handled by patchELFDynamic.
if (BC->FiniAddress)
return;
return Error::success();

const RuntimeLibrary *RT = BC->getRuntimeLibrary();
if (!RT || !RT->getRuntimeFiniAddress())
return;
return Error::success();

assert(BC->FiniArrayAddress && BC->FiniArraySize &&
"inconsistent .fini_array state");
if (!BC->FiniArrayAddress || !BC->FiniArraySize)
return createStringError(std::errc::not_supported,
"inconsistent .fini_array state");

ErrorOr<BinarySection &> FiniArraySection =
BC->getSectionForAddress(*BC->FiniArrayAddress);
assert(FiniArraySection && ".fini_array removed");
if (!FiniArraySection)
return createStringError(std::errc::not_supported, ".fini_array removed");

if (std::optional<Relocation> Reloc =
FiniArraySection->takeDynamicRelocationAt(0)) {
assert(Reloc->Addend == BC->FiniFunctionAddress &&
"inconsistent .fini_array dynamic relocation");
if (Reloc->Addend != BC->FiniFunctionAddress)
return createStringError(std::errc::not_supported,
"inconsistent .fini_array dynamic relocation");
Reloc->Addend = RT->getRuntimeFiniAddress();
FiniArraySection->addDynamicRelocation(*Reloc);
}
Expand All @@ -1480,6 +1642,10 @@ void RewriteInstance::updateRtFiniReloc() {
FiniArraySection->addPendingRelocation(Relocation{
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
BC->outs() << "BOLT-INFO: runtime library finalization was hooked via "
".fini_array entry, set to 0x"
<< Twine::utohexstr(RT->getRuntimeFiniAddress()) << "\n";
return Error::success();
}

void RewriteInstance::registerFragments() {
Expand Down Expand Up @@ -2178,6 +2344,14 @@ void RewriteInstance::adjustCommandLineOptions() {
exit(1);
}

if (opts::Instrument && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT &&
!BC->HasInterpHeader) {
BC->errs()
<< "BOLT-WARNING: adjusted runtime-lib-init-hook to 'init' due to "
"absence of INTERP header\n";
opts::RuntimeLibInitHook = opts::RLIH_INIT;
}

if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
opts::HotTextMoveSections.addValue(".stub");
opts::HotTextMoveSections.addValue(".mover");
Expand Down Expand Up @@ -4849,9 +5023,14 @@ void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
ELFEhdrTy NewEhdr = Obj.getHeader();

if (BC->HasRelocations) {
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
if (RtLibrary && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT) {
NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
else
BC->outs()
<< "BOLT-INFO: runtime library initialization was hooked via ELF "
"Header Entry Point, set to 0x"
<< Twine::utohexstr(NewEhdr.e_entry) << "\n";
} else
NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
"cannot find new address for entry point");
Expand Down Expand Up @@ -5692,14 +5871,23 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
}
RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) {
NewDE.d_un.d_ptr = Addr;
BC->outs()
<< "BOLT-INFO: runtime library finalization was hooked via "
"DT_FINI, set to 0x"
<< Twine::utohexstr(Addr) << "\n";
}
}
if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
if (RtLibrary && Dyn.getTag() == ELF::DT_INIT &&
(!BC->HasInterpHeader ||
opts::RuntimeLibInitHook == opts::RLIH_INIT)) {
if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
<< Twine::utohexstr(Addr) << '\n');
NewDE.d_un.d_ptr = Addr;
BC->outs()
<< "BOLT-INFO: runtime library initialization was hooked via "
"DT_INIT, set to 0x"
<< Twine::utohexstr(Addr) << "\n";
}
}
break;
Expand Down Expand Up @@ -5767,10 +5955,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
for (const Elf_Dyn &Dyn : DynamicEntries) {
switch (Dyn.d_tag) {
case ELF::DT_INIT:
if (!BC->HasInterpHeader) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
BC->StartFunctionAddress = Dyn.getPtr();
}
BC->InitAddress = Dyn.getPtr();
break;
case ELF::DT_INIT_ARRAY:
BC->InitArrayAddress = Dyn.getPtr();
break;
case ELF::DT_INIT_ARRAYSZ:
BC->InitArraySize = Dyn.getPtr();
break;
case ELF::DT_FINI:
BC->FiniAddress = Dyn.getPtr();
Expand Down
Loading