diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index adacb50dc167c..b6d94b9868e49 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -791,9 +791,44 @@ void RewriteInstance::discoverFileObjects() { BinarySection Section(*BC, *cantFail(Sym.getSection())); return Section.isAllocatable(); }; + auto checkSymbolInSection = [this](const SymbolInfo &S) { + // Sometimes, we encounter symbols with addresses outside their section. If + // such symbols happen to fall into another section, they can interfere with + // disassembly. Notably, this occurs with AArch64 marker symbols ($d and $t) + // that belong to .eh_frame, but end up pointing into .text. + // As a workaround, we ignore all symbols that lie outside their sections. + auto Section = cantFail(S.Symbol.getSection()); + + // Accept all absolute symbols. + if (Section == InputFile->section_end()) + return true; + + uint64_t SecStart = Section->getAddress(); + uint64_t SecEnd = SecStart + Section->getSize(); + uint64_t SymEnd = S.Address + ELFSymbolRef(S.Symbol).getSize(); + if (S.Address >= SecStart && SymEnd <= SecEnd) + return true; + + auto SymType = cantFail(S.Symbol.getType()); + // Skip warnings for common benign cases. + if (opts::Verbosity < 1 && SymType == SymbolRef::ST_Other) + return false; // E.g. ELF::STT_TLS. + + auto SymName = S.Symbol.getName(); + auto SecName = cantFail(S.Symbol.getSection())->getName(); + BC->errs() << "BOLT-WARNING: ignoring symbol " + << (SymName ? *SymName : "[unnamed]") << " at 0x" + << Twine::utohexstr(S.Address) << ", which lies outside " + << (SecName ? *SecName : "[unnamed]") << "\n"; + + return false; + }; for (const SymbolRef &Symbol : InputFile->symbols()) - if (isSymbolInMemory(Symbol)) - SortedSymbols.push_back({cantFail(Symbol.getAddress()), Symbol}); + if (isSymbolInMemory(Symbol)) { + SymbolInfo SymInfo{cantFail(Symbol.getAddress()), Symbol}; + if (checkSymbolInSection(SymInfo)) + SortedSymbols.push_back(SymInfo); + } auto CompareSymbols = [this](const SymbolInfo &A, const SymbolInfo &B) { if (A.Address != B.Address) diff --git a/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml b/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml new file mode 100644 index 0000000000000..446397b911d9e --- /dev/null +++ b/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml @@ -0,0 +1,56 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_AARCH64 + Entry: 0x2a0000 +ProgramHeaders: + - Type: PT_PHDR + Flags: [ PF_R ] + VAddr: 0x40 + Align: 0x8 + FileSize: 0xa8 + MemSize: 0xa8 + Offset: 0x40 + - Type: PT_LOAD + Flags: [ PF_R ] + VAddr: 0x0 + Align: 0x10000 + FileSize: 0xf8 + MemSize: 0xf8 + Offset: 0x0 + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + VAddr: 0x2a0000 + Align: 0x10000 + FirstSec: .text + LastSec: .ignored +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x2a0000 + AddressAlign: 0x4 + Content: 400580d2c0035fd6 + - Name: .ignored + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x2a0008 + AddressAlign: 0x8 + Size: 0x8 + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x2a0010 + AddressAlign: 0x8 + Content: 1000000000000000017a520004781e010b0c1f00140000001800000000002a0008000000000e01410e010000 +Symbols: + - Name: func + Section: .text + Value: 0x2a0000 + Size: 0x8 + - Name: '$d.42' + Section: .ignored + Value: 0x2a0004 +... diff --git a/bolt/test/AArch64/spurious-marker-symbol.test b/bolt/test/AArch64/spurious-marker-symbol.test new file mode 100644 index 0000000000000..901db08774a74 --- /dev/null +++ b/bolt/test/AArch64/spurious-marker-symbol.test @@ -0,0 +1,61 @@ +// Check that marker symbols ($d, $x) denoting data embedded in code are ignored +// if they fall outside their respective sections. + +// RUN: yaml2obj %S/Inputs/spurious-marker-symbol.yaml -o %t.exe +// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s +// CHECK: 1 out of 1 functions were overwritten +// RUN: llvm-objdump -j .text -d %t.bolt | FileCheck %s -check-prefix=CHECK-DISASM +// CHECK-DISASM: func +// CHECK-DISASM: 2a0000: d2800540 mov +// CHECK-DISASM: 2a0004: d65f03c0 ret + +// The YAML encodes the following assembly and debug information: + + .text + .globl func + .type func, %function +func: + mov x0, #42 +// $d.42: (symbol in .ignored, with an address in .text) + ret + +// .eh_frame contains minimal DWARF with a CFA operation on the `ret`. BOLT +// should ignore the spurious `$d.42`. If it doesn't, then it will stop +// disassembling after the `mov` and will fail to process the second +// DW_CFA_def_cfa_offset. +// +// CIE +// length: 00000010 +// CIE_id: 00000000 +// version: 01 +// augmentation: +// "zR" 7a 52 00 +// - read augmentation data +// - read FDE pointer encoding +// code_alignment_factor: 04 +// data_alignment_factor: 78 (-8) +// return_address_register: 1e (r30 / lr) +// +// augmentation data: +// length: 01 +// FDE pointers are absptr+sdata4 0b +// +// initial_instructions: +// DW_CFA_def_cfa (31, 0): 0c 1f 00 +// +// Encoding: 10000000'00000000'01'7a5200'04'78'1e'10'0b'0c1f00 +// +// FDE +// length: 00000014 +// CIE_pointer: 00000018 (backwards offset from here to CIE) +// initial_location: 002a0000 (`func` as absptr+sdata4) +// address_range: 00000008 +// augmentation data: +// length: 00 +// instructions: +// DW_CFA_def_cfa_offset (1) 0e 01 +// DW_CFA_advance_loc (1) 41 (`ret` at 0x2a0004) +// DW_CFA_def_cfa_offset (1) 0e 01 Fails unless $d.42 is ignored. +// DW_CFA_nop 00 00 +// +// Encoding: 14000000'18000000'00002a00'08000000'000e0141'0e010000 diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index f49763e31a9c7..20c0ef5ccfcea 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -199,7 +199,7 @@ class SymbolRef : public BasicSymbolRef { Expected getType() const; /// Get section this symbol is defined in reference to. Result is - /// end_sections() if it is undefined or is an absolute symbol. + /// section_end() if it is undefined or is an absolute symbol. Expected getSection() const; const ObjectFile *getObject() const;