Skip to content

Commit 33e0301

Browse files
authored
[BOLT] Add validation for direct call/branch targets (#165406)
In some edge cases, a binary may contain direct `branch` or `call` instructions whose target do not point to a valid executable instruction. This can occur due to compiler bugs, hand-written assembly, obfuscation technique, **or when control flow targets a data by mistake.** We also encountered the problems as described in this [issue](#149382), where "data in code" within OpenSSL's hand-written assembly was misidentified as instructions(island identification seems fail due to the absence of a corresponding data symbol). The problem occurred because a data sequence was incorrectly disassembled as a "jb" instruction. The point here is that the data should not be pointed to by any edge, so this patch tries to address this by validating the destination address for **direct branches and calls**. If the target instruction is invalid(implies a corrupted control flow), this function will be set ignored. Although this approach appears helpful for addressing the 'data in code' problem, its validation might be compromised if the data can be disassembled as normal instruction.
1 parent f53f6f7 commit 33e0301

File tree

8 files changed

+151
-18
lines changed

8 files changed

+151
-18
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -943,10 +943,11 @@ class BinaryContext {
943943
/// that should be used by the branch. For example, main or secondary entry
944944
/// point.
945945
///
946-
/// If \p Address is an invalid destination, such as a constant island, return
947-
/// nullptr and mark \p BF as ignored, since we cannot properly handle a
948-
/// branch to a constant island.
949-
MCSymbol *handleExternalBranchTarget(uint64_t Address, BinaryFunction &BF);
946+
/// This function also performs validations: If \p Address points to an
947+
/// invalid instruction or lies within a constant island, return nullptr and
948+
/// mark both \p Source and \p Target as ignored.
949+
MCSymbol *handleExternalBranchTarget(uint64_t Address, BinaryFunction &Source,
950+
BinaryFunction &Target);
950951

951952
/// Analyze memory contents at the given \p Address and return the type of
952953
/// memory contents (such as a possible jump table).

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,10 @@ class BinaryFunction {
385385
/// True if the function should not have an associated symbol table entry.
386386
bool IsAnonymous{false};
387387

388+
/// Indicates whether branch validation has already been performed,
389+
/// to avoid redundant processing.
390+
bool NeedBranchValidation{true};
391+
388392
/// Name for the section this function code should reside in.
389393
std::string CodeSectionName;
390394

@@ -2320,6 +2324,11 @@ class BinaryFunction {
23202324
/// zero-value bytes.
23212325
bool isZeroPaddingAt(uint64_t Offset) const;
23222326

2327+
/// Validate if the target of any internal direct branch/call is a valid
2328+
/// executable instruction.
2329+
/// Return true if all the targets are valid, false otherwise.
2330+
bool validateInternalBranches();
2331+
23232332
/// Check that entry points have an associated instruction at their
23242333
/// offsets after disassembly.
23252334
void postProcessEntryPoints();

bolt/lib/Core/BinaryContext.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -531,20 +531,40 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
531531
}
532532

533533
MCSymbol *BinaryContext::handleExternalBranchTarget(uint64_t Address,
534-
BinaryFunction &BF) {
535-
if (BF.isInConstantIsland(Address)) {
536-
BF.setIgnored();
537-
this->outs() << "BOLT-WARNING: ignoring entry point at address 0x"
538-
<< Twine::utohexstr(Address)
539-
<< " in constant island of function " << BF << '\n';
540-
return nullptr;
534+
BinaryFunction &Source,
535+
BinaryFunction &Target) {
536+
const uint64_t Offset = Address - Target.getAddress();
537+
assert(Offset < Target.getSize() &&
538+
"Address should be inside the referenced function");
539+
540+
bool IsValid = true;
541+
if (Source.NeedBranchValidation) {
542+
if (Target.CurrentState == BinaryFunction::State::Disassembled &&
543+
!Target.getInstructionAtOffset(Offset)) {
544+
this->errs()
545+
<< "BOLT-WARNING: corrupted control flow detected in function "
546+
<< Source
547+
<< ": an external branch/call targets an invalid instruction "
548+
<< "in function " << Target << " at address 0x"
549+
<< Twine::utohexstr(Address) << "; ignoring both functions\n";
550+
IsValid = false;
551+
}
552+
if (Target.isInConstantIsland(Address)) {
553+
this->errs() << "BOLT-WARNING: ignoring entry point at address 0x"
554+
<< Twine::utohexstr(Address)
555+
<< " in constant island of function " << Target << '\n';
556+
IsValid = false;
557+
}
541558
}
542559

543-
const uint64_t Offset = Address - BF.getAddress();
544-
assert(Offset < BF.getSize() &&
545-
"Address should be inside the referenced function");
560+
if (!IsValid) {
561+
Source.NeedBranchValidation = false;
562+
Source.setIgnored();
563+
Target.setIgnored();
564+
return nullptr;
565+
}
546566

547-
return Offset ? BF.addEntryPointAtOffset(Offset) : BF.getSymbol();
567+
return Offset ? Target.addEntryPointAtOffset(Offset) : Target.getSymbol();
548568
}
549569

550570
MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
@@ -1433,7 +1453,7 @@ void BinaryContext::processInterproceduralReferences() {
14331453

14341454
// Create an extra entry point if needed. Can also render the target
14351455
// function ignored if the reference is invalid.
1436-
handleExternalBranchTarget(Address, *TargetFunction);
1456+
handleExternalBranchTarget(Address, Function, *TargetFunction);
14371457

14381458
continue;
14391459
}

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,8 +1038,10 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address) {
10381038

10391039
// For AArch64, check if this address is part of a constant island.
10401040
if (BC.isAArch64()) {
1041-
if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
1041+
if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) {
1042+
Labels[Offset] = IslandSym;
10421043
return IslandSym;
1044+
}
10431045
}
10441046

10451047
if (Offset == getSize())
@@ -1692,7 +1694,7 @@ bool BinaryFunction::scanExternalRefs() {
16921694
// Get a reference symbol for the function when address is a valid code
16931695
// reference.
16941696
BranchTargetSymbol =
1695-
BC.handleExternalBranchTarget(TargetAddress, *TargetFunction);
1697+
BC.handleExternalBranchTarget(TargetAddress, *this, *TargetFunction);
16961698
if (!BranchTargetSymbol)
16971699
continue;
16981700
}
@@ -1900,6 +1902,36 @@ bool BinaryFunction::scanExternalRefs() {
19001902
return Success;
19011903
}
19021904

1905+
bool BinaryFunction::validateInternalBranches() {
1906+
if (!isSimple() || TrapsOnEntry)
1907+
return true;
1908+
1909+
for (const auto &KV : Labels) {
1910+
MCSymbol *Label = KV.second;
1911+
if (getSecondaryEntryPointSymbol(Label))
1912+
continue;
1913+
1914+
const uint32_t Offset = KV.first;
1915+
// Skip empty functions and out-of-bounds offsets,
1916+
// as they may not be disassembled.
1917+
if (!Offset || (Offset > getSize()))
1918+
continue;
1919+
1920+
if (!getInstructionAtOffset(Offset) ||
1921+
isInConstantIsland(getAddress() + Offset)) {
1922+
BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
1923+
<< *this << ": an internal branch/call targets an invalid "
1924+
<< "instruction at address 0x"
1925+
<< Twine::utohexstr(getAddress() + Offset)
1926+
<< "; ignoring this function\n";
1927+
setIgnored();
1928+
return false;
1929+
}
1930+
}
1931+
1932+
return true;
1933+
}
1934+
19031935
void BinaryFunction::postProcessEntryPoints() {
19041936
if (!isSimple())
19051937
return;

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3689,6 +3689,7 @@ void RewriteInstance::disassembleFunctions() {
36893689
if (!shouldDisassemble(Function))
36903690
continue;
36913691

3692+
Function.validateInternalBranches();
36923693
Function.postProcessEntryPoints();
36933694
Function.postProcessJumpTables();
36943695
}

bolt/test/AArch64/constant-island-alignment.s

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ _start:
5353
blr x0
5454
mov x0, #1
5555
ret
56+
.size _start,.-_start
5657
nop
5758
# CHECK: {{0|8}} <$d>:
5859
.Lci:
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
## Test that BOLT errs when detecting the target
2+
## of a direct call/branch is a invalid instruction
3+
4+
# REQUIRES: system-linux
5+
# RUN: rm -rf %t && mkdir -p %t && cd %t
6+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-linux %s -o main.o
7+
# RUN: %clang %cflags %t/main.o -o main.exe -Wl,-q
8+
# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
9+
10+
# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrupt: an external branch/call targets an invalid instruction in function external_func at address 0x{{[0-9a-f]+}}; ignoring both functions
11+
# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrupt: an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}; ignoring this function
12+
13+
14+
.globl internal_corrupt
15+
.type internal_corrupt,@function
16+
internal_corrupt:
17+
b constant_island_0 // targeting the data in code
18+
constant_island_0:
19+
.word 0xffffffff
20+
.size internal_corrupt,.-internal_corrupt
21+
22+
23+
.globl external_corrupt
24+
.type external_corrupt,@function
25+
external_corrupt:
26+
b constant_island_1 // targeting the data in code externally
27+
.size external_corrupt,.-external_corrupt
28+
29+
.globl external_func
30+
.type external_func,@function
31+
external_func:
32+
add x0, x0, x1
33+
constant_island_1:
34+
.word 0xffffffff // data in code
35+
ret
36+
.size external_func,.-external_func
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
## Test that BOLT errs when detecting the target
2+
## of a direct call/branch is a invalid instruction
3+
4+
# REQUIRES: system-linux
5+
# RUN: rm -rf %t && mkdir -p %t && cd %t
6+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o
7+
# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
8+
# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
9+
10+
# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrupt: an external branch/call targets an invalid instruction in function external_func at address 0x{{[0-9a-f]+}}; ignoring both functions
11+
# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrupt: an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}; ignoring this function
12+
13+
14+
.globl internal_corrupt
15+
.type internal_corrupt,@function
16+
internal_corrupt:
17+
jb data_in_code + 1 # targeting the data in code, and jump into the middle of 'xorb' instruction
18+
data_in_code:
19+
.byte 0x34, 0x01 # data in code, will be disassembled as 'xorb 0x1, %al'
20+
.size internal_corrupt,.-internal_corrupt
21+
22+
23+
.globl external_corrupt
24+
.type external_corrupt,@function
25+
external_corrupt:
26+
jb external_func + 1 # targeting the middle of normal instruction externally
27+
.size external_corrupt,.-external_corrupt
28+
29+
.globl external_func
30+
.type external_func,@function
31+
external_func:
32+
addq $1, %rax # normal instruction
33+
.size external_func,.-external_func

0 commit comments

Comments
 (0)