diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index b215a1558cbb4..c9ec1b4aefb3e 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -2236,6 +2236,12 @@ class BinaryFunction { /// it is probably another function. bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const; + /// Validates if the target of a direct branch/call is a valid + /// executable instruction. + /// Return true if the target is valid, false otherwise. + bool validateBranchTarget(uint64_t TargetAddress, uint64_t AbsoluteInstrAddr, + const ArrayRef &CurrentFunctionData); + /// Disassemble function from raw data. /// If successful, this function will populate the list of instructions /// for this function together with offsets from the function start diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index fbe186454351c..0a638ec58d000 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1283,6 +1283,40 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { return std::nullopt; } +bool BinaryFunction::validateBranchTarget( + uint64_t TargetAddress, uint64_t AbsoluteInstrAddr, + const ArrayRef &CurrentFunctionData) { + if (auto *TargetFunc = BC.getBinaryFunctionContainingAddress(TargetAddress)) { + const uint64_t TargetOffset = TargetAddress - TargetFunc->getAddress(); + ArrayRef TargetFunctionData; + // Check if the target address is within the current function. + if (TargetFunc == this) { + TargetFunctionData = CurrentFunctionData; + } else { + // external call/branch, fetch the binary data for target + ErrorOr> TargetDataOrErr = TargetFunc->getData(); + assert(TargetDataOrErr && "function data is not available"); + TargetFunctionData = *TargetDataOrErr; + } + + MCInst TargetInst; + uint64_t TargetInstSize; + if (!BC.SymbolicDisAsm->getInstruction( + TargetInst, TargetInstSize, TargetFunctionData.slice(TargetOffset), + TargetAddress, nulls())) { + // If the target address cannot be disassembled well, + // it implies a corrupted control flow. + BC.errs() << "BOLT-WARNING: direct branch/call at 0x" + << Twine::utohexstr(AbsoluteInstrAddr) << " in function " + << *this << " targets an invalid instruction at 0x" + << Twine::utohexstr(TargetAddress) << "\n"; + return false; + } + } + + return true; +} + Error BinaryFunction::disassemble() { NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs", "Build Binary Functions", opts::TimeBuild); @@ -1396,6 +1430,11 @@ Error BinaryFunction::disassemble() { uint64_t TargetAddress = 0; if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size, TargetAddress)) { + if (!validateBranchTarget(TargetAddress, AbsoluteInstrAddr, + FunctionData)) { + setIgnored(); + break; + } // Check if the target is within the same function. Otherwise it's // a call, possibly a tail call. // diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s new file mode 100644 index 0000000000000..56437681c238f --- /dev/null +++ b/bolt/test/X86/validate-branch-target.s @@ -0,0 +1,35 @@ +## Test that BOLT errs when detecting the target +## of a direct call/branch is a invalid instruction + +# REQUIRES: system-linux +# RUN: rm -rf %t && mkdir -p %t && cd %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o +# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe +# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS + +# CHECK-TARGETS: BOLT-WARNING: direct branch/call at 0x{{[0-9a-f]+}} in function RC4_options targets an invalid instruction at 0x{{[0-9a-f]+}} + +# a date-in-code function case from OPENSSL +.globl RC4_options +.type RC4_options,@function +.align 16 +RC4_options: + leaq .Lopts(%rip),%rax + btl $20,%edx + jc .L8xchar + btl $30,%edx + jnc .Ldone + addq $25,%rax + .byte 0xf3,0xc3 +.L8xchar: + addq $12,%rax +.Ldone: + .byte 0xf3,0xc3 +.align 64 +.Lopts: +.byte 114,99,52,40,56,120,44,105,110,116,41,0 # data '114' will be disassembled as 'jb' +.byte 114,99,52,40,56,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,49,54,120,44,105,110,116,41,0 +.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.size RC4_options,.-RC4_options