Skip to content

Commit f1dbf92

Browse files
committed
Merge #20434: contrib: Parse ELF directly for symbol and security checks
a0a7718 contrib: Changes to checks for PowerPC64 (Luke Dashjr) 634f6ec contrib: Parse ELF directly for symbol and security checks (Wladimir J. van der Laan) Pull request description: Instead of the ever-messier text parsing of the output of the readelf tool (which is clearly meant for human consumption not to be machine parseable), parse the ELF binaries directly. Add a small dependency-less ELF parser specific to the checks. This is slightly more secure, too, because it removes potential ambiguity due to misparsing and changes in the output format of `elfread`. It also allows for stricter and more specific ELF format checks in the future. This removes the build-time dependency for `readelf`. It passes the test-security-check for me locally, ~~though I haven't checked on all platforms~~. I've checked that this works on the cross-compile output for all ELF platforms supported by Bitcoin Core at the moment, as well as PPC64 LE and BE. Top commit has no ACKs. Tree-SHA512: 7f9241fec83ee512642fecf5afd90546964561efd8c8c0f99826dcf6660604a4db2b7255e1afb1e9bb0211fd06f5dbad18a6175dfc03e39761a40025118e7bfc
2 parents 83e4670 + a0a7718 commit f1dbf92

File tree

6 files changed

+419
-169
lines changed

6 files changed

+419
-169
lines changed

Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ DIST_SHARE = \
5252
$(top_srcdir)/share/rpcauth
5353

5454
BIN_CHECKS=$(top_srcdir)/contrib/devtools/symbol-check.py \
55-
$(top_srcdir)/contrib/devtools/security-check.py
55+
$(top_srcdir)/contrib/devtools/security-check.py \
56+
$(top_srcdir)/contrib/devtools/pixie.py
5657

5758
WINDOWS_PACKAGING = $(top_srcdir)/share/pixmaps/bitcoin.ico \
5859
$(top_srcdir)/share/pixmaps/nsis-header.bmp \

configure.ac

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ AC_PATH_PROG([GIT], [git])
106106
AC_PATH_PROG(CCACHE,ccache)
107107
AC_PATH_PROG(XGETTEXT,xgettext)
108108
AC_PATH_PROG(HEXDUMP,hexdump)
109-
AC_PATH_TOOL(READELF, readelf)
110109
AC_PATH_TOOL(CPPFILT, c++filt)
111110
AC_PATH_TOOL(OBJCOPY, objcopy)
112111
AC_PATH_PROG(DOXYGEN, doxygen)

contrib/devtools/pixie.py

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2020 Wladimir J. van der Laan
3+
# Distributed under the MIT software license, see the accompanying
4+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
5+
'''
6+
Compact, self-contained ELF implementation for bitcoin-core security checks.
7+
'''
8+
import struct
9+
import types
10+
from typing import Dict, List, Optional, Union, Tuple
11+
12+
# you can find all these values in elf.h
13+
EI_NIDENT = 16
14+
15+
# Byte indices in e_ident
16+
EI_CLASS = 4 # ELFCLASSxx
17+
EI_DATA = 5 # ELFDATAxxxx
18+
19+
ELFCLASS32 = 1 # 32-bit
20+
ELFCLASS64 = 2 # 64-bit
21+
22+
ELFDATA2LSB = 1 # little endian
23+
ELFDATA2MSB = 2 # big endian
24+
25+
# relevant values for e_machine
26+
EM_386 = 3
27+
EM_PPC64 = 21
28+
EM_ARM = 40
29+
EM_AARCH64 = 183
30+
EM_X86_64 = 62
31+
EM_RISCV = 243
32+
33+
# relevant values for e_type
34+
ET_DYN = 3
35+
36+
# relevant values for sh_type
37+
SHT_PROGBITS = 1
38+
SHT_STRTAB = 3
39+
SHT_DYNAMIC = 6
40+
SHT_DYNSYM = 11
41+
SHT_GNU_verneed = 0x6ffffffe
42+
SHT_GNU_versym = 0x6fffffff
43+
44+
# relevant values for p_type
45+
PT_LOAD = 1
46+
PT_GNU_STACK = 0x6474e551
47+
PT_GNU_RELRO = 0x6474e552
48+
49+
# relevant values for p_flags
50+
PF_X = (1 << 0)
51+
PF_W = (1 << 1)
52+
PF_R = (1 << 2)
53+
54+
# relevant values for d_tag
55+
DT_NEEDED = 1
56+
DT_FLAGS = 30
57+
58+
# relevant values of `d_un.d_val' in the DT_FLAGS entry
59+
DF_BIND_NOW = 0x00000008
60+
61+
# relevant d_tags with string payload
62+
STRING_TAGS = {DT_NEEDED}
63+
64+
# rrlevant values for ST_BIND subfield of st_info (symbol binding)
65+
STB_LOCAL = 0
66+
67+
class ELFRecord(types.SimpleNamespace):
68+
'''Unified parsing for ELF records.'''
69+
def __init__(self, data: bytes, offset: int, eh: 'ELFHeader', total_size: Optional[int]) -> None:
70+
hdr_struct = self.STRUCT[eh.ei_class][0][eh.ei_data]
71+
if total_size is not None and hdr_struct.size > total_size:
72+
raise ValueError(f'{self.__class__.__name__} header size too small ({total_size} < {hdr_struct.size})')
73+
for field, value in zip(self.STRUCT[eh.ei_class][1], hdr_struct.unpack(data[offset:offset + hdr_struct.size])):
74+
setattr(self, field, value)
75+
76+
def BiStruct(chars: str) -> Dict[int, struct.Struct]:
77+
'''Compile a struct parser for both endians.'''
78+
return {
79+
ELFDATA2LSB: struct.Struct('<' + chars),
80+
ELFDATA2MSB: struct.Struct('>' + chars),
81+
}
82+
83+
class ELFHeader(ELFRecord):
84+
FIELDS = ['e_type', 'e_machine', 'e_version', 'e_entry', 'e_phoff', 'e_shoff', 'e_flags', 'e_ehsize', 'e_phentsize', 'e_phnum', 'e_shentsize', 'e_shnum', 'e_shstrndx']
85+
STRUCT = {
86+
ELFCLASS32: (BiStruct('HHIIIIIHHHHHH'), FIELDS),
87+
ELFCLASS64: (BiStruct('HHIQQQIHHHHHH'), FIELDS),
88+
}
89+
90+
def __init__(self, data: bytes, offset: int) -> None:
91+
self.e_ident = data[offset:offset + EI_NIDENT]
92+
if self.e_ident[0:4] != b'\x7fELF':
93+
raise ValueError('invalid ELF magic')
94+
self.ei_class = self.e_ident[EI_CLASS]
95+
self.ei_data = self.e_ident[EI_DATA]
96+
97+
super().__init__(data, offset + EI_NIDENT, self, None)
98+
99+
def __repr__(self) -> str:
100+
return f'Header(e_ident={self.e_ident!r}, e_type={self.e_type}, e_machine={self.e_machine}, e_version={self.e_version}, e_entry={self.e_entry}, e_phoff={self.e_phoff}, e_shoff={self.e_shoff}, e_flags={self.e_flags}, e_ehsize={self.e_ehsize}, e_phentsize={self.e_phentsize}, e_phnum={self.e_phnum}, e_shentsize={self.e_shentsize}, e_shnum={self.e_shnum}, e_shstrndx={self.e_shstrndx})'
101+
102+
class Section(ELFRecord):
103+
name: Optional[bytes] = None
104+
FIELDS = ['sh_name', 'sh_type', 'sh_flags', 'sh_addr', 'sh_offset', 'sh_size', 'sh_link', 'sh_info', 'sh_addralign', 'sh_entsize']
105+
STRUCT = {
106+
ELFCLASS32: (BiStruct('IIIIIIIIII'), FIELDS),
107+
ELFCLASS64: (BiStruct('IIQQQQIIQQ'), FIELDS),
108+
}
109+
110+
def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None:
111+
super().__init__(data, offset, eh, eh.e_shentsize)
112+
self._data = data
113+
114+
def __repr__(self) -> str:
115+
return f'Section(sh_name={self.sh_name}({self.name!r}), sh_type=0x{self.sh_type:x}, sh_flags={self.sh_flags}, sh_addr=0x{self.sh_addr:x}, sh_offset=0x{self.sh_offset:x}, sh_size={self.sh_size}, sh_link={self.sh_link}, sh_info={self.sh_info}, sh_addralign={self.sh_addralign}, sh_entsize={self.sh_entsize})'
116+
117+
def contents(self) -> bytes:
118+
'''Return section contents.'''
119+
return self._data[self.sh_offset:self.sh_offset + self.sh_size]
120+
121+
class ProgramHeader(ELFRecord):
122+
STRUCT = {
123+
# different ELF classes have the same fields, but in a different order to optimize space versus alignment
124+
ELFCLASS32: (BiStruct('IIIIIIII'), ['p_type', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_flags', 'p_align']),
125+
ELFCLASS64: (BiStruct('IIQQQQQQ'), ['p_type', 'p_flags', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_align']),
126+
}
127+
128+
def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None:
129+
super().__init__(data, offset, eh, eh.e_phentsize)
130+
131+
def __repr__(self) -> str:
132+
return f'ProgramHeader(p_type={self.p_type}, p_offset={self.p_offset}, p_vaddr={self.p_vaddr}, p_paddr={self.p_paddr}, p_filesz={self.p_filesz}, p_memsz={self.p_memsz}, p_flags={self.p_flags}, p_align={self.p_align})'
133+
134+
class Symbol(ELFRecord):
135+
STRUCT = {
136+
# different ELF classes have the same fields, but in a different order to optimize space versus alignment
137+
ELFCLASS32: (BiStruct('IIIBBH'), ['st_name', 'st_value', 'st_size', 'st_info', 'st_other', 'st_shndx']),
138+
ELFCLASS64: (BiStruct('IBBHQQ'), ['st_name', 'st_info', 'st_other', 'st_shndx', 'st_value', 'st_size']),
139+
}
140+
141+
def __init__(self, data: bytes, offset: int, eh: ELFHeader, symtab: Section, strings: bytes, version: Optional[bytes]) -> None:
142+
super().__init__(data, offset, eh, symtab.sh_entsize)
143+
self.name = _lookup_string(strings, self.st_name)
144+
self.version = version
145+
146+
def __repr__(self) -> str:
147+
return f'Symbol(st_name={self.st_name}({self.name!r}), st_value={self.st_value}, st_size={self.st_size}, st_info={self.st_info}, st_other={self.st_other}, st_shndx={self.st_shndx}, version={self.version!r})'
148+
149+
@property
150+
def is_import(self) -> bool:
151+
'''Returns whether the symbol is an imported symbol.'''
152+
return self.st_bind != STB_LOCAL and self.st_shndx == 0
153+
154+
@property
155+
def is_export(self) -> bool:
156+
'''Returns whether the symbol is an exported symbol.'''
157+
return self.st_bind != STB_LOCAL and self.st_shndx != 0
158+
159+
@property
160+
def st_bind(self) -> int:
161+
'''Returns STB_*.'''
162+
return self.st_info >> 4
163+
164+
class Verneed(ELFRecord):
165+
DEF = (BiStruct('HHIII'), ['vn_version', 'vn_cnt', 'vn_file', 'vn_aux', 'vn_next'])
166+
STRUCT = { ELFCLASS32: DEF, ELFCLASS64: DEF }
167+
168+
def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None:
169+
super().__init__(data, offset, eh, None)
170+
171+
def __repr__(self) -> str:
172+
return f'Verneed(vn_version={self.vn_version}, vn_cnt={self.vn_cnt}, vn_file={self.vn_file}, vn_aux={self.vn_aux}, vn_next={self.vn_next})'
173+
174+
class Vernaux(ELFRecord):
175+
DEF = (BiStruct('IHHII'), ['vna_hash', 'vna_flags', 'vna_other', 'vna_name', 'vna_next'])
176+
STRUCT = { ELFCLASS32: DEF, ELFCLASS64: DEF }
177+
178+
def __init__(self, data: bytes, offset: int, eh: ELFHeader, strings: bytes) -> None:
179+
super().__init__(data, offset, eh, None)
180+
self.name = _lookup_string(strings, self.vna_name)
181+
182+
def __repr__(self) -> str:
183+
return f'Veraux(vna_hash={self.vna_hash}, vna_flags={self.vna_flags}, vna_other={self.vna_other}, vna_name={self.vna_name}({self.name!r}), vna_next={self.vna_next})'
184+
185+
class DynTag(ELFRecord):
186+
STRUCT = {
187+
ELFCLASS32: (BiStruct('II'), ['d_tag', 'd_val']),
188+
ELFCLASS64: (BiStruct('QQ'), ['d_tag', 'd_val']),
189+
}
190+
191+
def __init__(self, data: bytes, offset: int, eh: ELFHeader, section: Section) -> None:
192+
super().__init__(data, offset, eh, section.sh_entsize)
193+
194+
def __repr__(self) -> str:
195+
return f'DynTag(d_tag={self.d_tag}, d_val={self.d_val})'
196+
197+
def _lookup_string(data: bytes, index: int) -> bytes:
198+
'''Look up string by offset in ELF string table.'''
199+
endx = data.find(b'\x00', index)
200+
assert endx != -1
201+
return data[index:endx]
202+
203+
VERSYM_S = BiStruct('H') # .gnu_version section has a single 16-bit integer per symbol in the linked section
204+
def _parse_symbol_table(section: Section, strings: bytes, eh: ELFHeader, versym: bytes, verneed: Dict[int, bytes]) -> List[Symbol]:
205+
'''Parse symbol table, return a list of symbols.'''
206+
data = section.contents()
207+
symbols = []
208+
versym_iter = (verneed.get(v[0]) for v in VERSYM_S[eh.ei_data].iter_unpack(versym))
209+
for ofs, version in zip(range(0, len(data), section.sh_entsize), versym_iter):
210+
symbols.append(Symbol(data, ofs, eh, section, strings, version))
211+
return symbols
212+
213+
def _parse_verneed(section: Section, strings: bytes, eh: ELFHeader) -> Dict[int, bytes]:
214+
'''Parse .gnu.version_r section, return a dictionary of {versym: 'GLIBC_...'}.'''
215+
data = section.contents()
216+
ofs = 0
217+
result = {}
218+
while True:
219+
verneed = Verneed(data, ofs, eh)
220+
aofs = verneed.vn_aux
221+
while True:
222+
vernaux = Vernaux(data, aofs, eh, strings)
223+
result[vernaux.vna_other] = vernaux.name
224+
if not vernaux.vna_next:
225+
break
226+
aofs += vernaux.vna_next
227+
228+
if not verneed.vn_next:
229+
break
230+
ofs += verneed.vn_next
231+
232+
return result
233+
234+
def _parse_dyn_tags(section: Section, strings: bytes, eh: ELFHeader) -> List[Tuple[int, Union[bytes, int]]]:
235+
'''Parse dynamic tags. Return array of tuples.'''
236+
data = section.contents()
237+
ofs = 0
238+
result = []
239+
for ofs in range(0, len(data), section.sh_entsize):
240+
tag = DynTag(data, ofs, eh, section)
241+
val = _lookup_string(strings, tag.d_val) if tag.d_tag in STRING_TAGS else tag.d_val
242+
result.append((tag.d_tag, val))
243+
244+
return result
245+
246+
class ELFFile:
247+
sections: List[Section]
248+
program_headers: List[ProgramHeader]
249+
dyn_symbols: List[Symbol]
250+
dyn_tags: List[Tuple[int, Union[bytes, int]]]
251+
252+
def __init__(self, data: bytes) -> None:
253+
self.data = data
254+
self.hdr = ELFHeader(self.data, 0)
255+
self._load_sections()
256+
self._load_program_headers()
257+
self._load_dyn_symbols()
258+
self._load_dyn_tags()
259+
self._section_to_segment_mapping()
260+
261+
def _load_sections(self) -> None:
262+
self.sections = []
263+
for idx in range(self.hdr.e_shnum):
264+
offset = self.hdr.e_shoff + idx * self.hdr.e_shentsize
265+
self.sections.append(Section(self.data, offset, self.hdr))
266+
267+
shstr = self.sections[self.hdr.e_shstrndx].contents()
268+
for section in self.sections:
269+
section.name = _lookup_string(shstr, section.sh_name)
270+
271+
def _load_program_headers(self) -> None:
272+
self.program_headers = []
273+
for idx in range(self.hdr.e_phnum):
274+
offset = self.hdr.e_phoff + idx * self.hdr.e_phentsize
275+
self.program_headers.append(ProgramHeader(self.data, offset, self.hdr))
276+
277+
def _load_dyn_symbols(self) -> None:
278+
# first, load 'verneed' section
279+
verneed = None
280+
for section in self.sections:
281+
if section.sh_type == SHT_GNU_verneed:
282+
strtab = self.sections[section.sh_link].contents() # associated string table
283+
assert verneed is None # only one section of this kind please
284+
verneed = _parse_verneed(section, strtab, self.hdr)
285+
assert verneed is not None
286+
287+
# then, correlate GNU versym sections with dynamic symbol sections
288+
versym = {}
289+
for section in self.sections:
290+
if section.sh_type == SHT_GNU_versym:
291+
versym[section.sh_link] = section
292+
293+
# finally, load dynsym sections
294+
self.dyn_symbols = []
295+
for idx, section in enumerate(self.sections):
296+
if section.sh_type == SHT_DYNSYM: # find dynamic symbol tables
297+
strtab_data = self.sections[section.sh_link].contents() # associated string table
298+
versym_data = versym[idx].contents() # associated symbol version table
299+
self.dyn_symbols += _parse_symbol_table(section, strtab_data, self.hdr, versym_data, verneed)
300+
301+
def _load_dyn_tags(self) -> None:
302+
self.dyn_tags = []
303+
for idx, section in enumerate(self.sections):
304+
if section.sh_type == SHT_DYNAMIC: # find dynamic tag tables
305+
strtab = self.sections[section.sh_link].contents() # associated string table
306+
self.dyn_tags += _parse_dyn_tags(section, strtab, self.hdr)
307+
308+
def _section_to_segment_mapping(self) -> None:
309+
for ph in self.program_headers:
310+
ph.sections = []
311+
for section in self.sections:
312+
if ph.p_vaddr <= section.sh_addr < (ph.p_vaddr + ph.p_memsz):
313+
ph.sections.append(section)
314+
315+
def query_dyn_tags(self, tag_in: int) -> List[Union[int, bytes]]:
316+
'''Return the values of all dyn tags with the specified tag.'''
317+
return [val for (tag, val) in self.dyn_tags if tag == tag_in]
318+
319+
320+
def load(filename: str) -> ELFFile:
321+
with open(filename, 'rb') as f:
322+
data = f.read()
323+
return ELFFile(data)

0 commit comments

Comments
 (0)