|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# Copyright (c) 2020 Wladimir J. van der Laan |
| 3 | +# Distributed under the MIT software license, see the accompanying |
| 4 | +# file COPYING or http://www.opensource.org/licenses/mit-license.php. |
| 5 | +''' |
| 6 | +Compact, self-contained ELF implementation for bitcoin-core security checks. |
| 7 | +''' |
| 8 | +import struct |
| 9 | +import types |
| 10 | +from typing import Dict, List, Optional, Union, Tuple |
| 11 | + |
| 12 | +# you can find all these values in elf.h |
| 13 | +EI_NIDENT = 16 |
| 14 | + |
| 15 | +# Byte indices in e_ident |
| 16 | +EI_CLASS = 4 # ELFCLASSxx |
| 17 | +EI_DATA = 5 # ELFDATAxxxx |
| 18 | + |
| 19 | +ELFCLASS32 = 1 # 32-bit |
| 20 | +ELFCLASS64 = 2 # 64-bit |
| 21 | + |
| 22 | +ELFDATA2LSB = 1 # little endian |
| 23 | +ELFDATA2MSB = 2 # big endian |
| 24 | + |
| 25 | +# relevant values for e_machine |
| 26 | +EM_386 = 3 |
| 27 | +EM_PPC64 = 21 |
| 28 | +EM_ARM = 40 |
| 29 | +EM_AARCH64 = 183 |
| 30 | +EM_X86_64 = 62 |
| 31 | +EM_RISCV = 243 |
| 32 | + |
| 33 | +# relevant values for e_type |
| 34 | +ET_DYN = 3 |
| 35 | + |
| 36 | +# relevant values for sh_type |
| 37 | +SHT_PROGBITS = 1 |
| 38 | +SHT_STRTAB = 3 |
| 39 | +SHT_DYNAMIC = 6 |
| 40 | +SHT_DYNSYM = 11 |
| 41 | +SHT_GNU_verneed = 0x6ffffffe |
| 42 | +SHT_GNU_versym = 0x6fffffff |
| 43 | + |
| 44 | +# relevant values for p_type |
| 45 | +PT_LOAD = 1 |
| 46 | +PT_GNU_STACK = 0x6474e551 |
| 47 | +PT_GNU_RELRO = 0x6474e552 |
| 48 | + |
| 49 | +# relevant values for p_flags |
| 50 | +PF_X = (1 << 0) |
| 51 | +PF_W = (1 << 1) |
| 52 | +PF_R = (1 << 2) |
| 53 | + |
| 54 | +# relevant values for d_tag |
| 55 | +DT_NEEDED = 1 |
| 56 | +DT_FLAGS = 30 |
| 57 | + |
| 58 | +# relevant values of `d_un.d_val' in the DT_FLAGS entry |
| 59 | +DF_BIND_NOW = 0x00000008 |
| 60 | + |
| 61 | +# relevant d_tags with string payload |
| 62 | +STRING_TAGS = {DT_NEEDED} |
| 63 | + |
| 64 | +# rrlevant values for ST_BIND subfield of st_info (symbol binding) |
| 65 | +STB_LOCAL = 0 |
| 66 | + |
| 67 | +class ELFRecord(types.SimpleNamespace): |
| 68 | + '''Unified parsing for ELF records.''' |
| 69 | + def __init__(self, data: bytes, offset: int, eh: 'ELFHeader', total_size: Optional[int]) -> None: |
| 70 | + hdr_struct = self.STRUCT[eh.ei_class][0][eh.ei_data] |
| 71 | + if total_size is not None and hdr_struct.size > total_size: |
| 72 | + raise ValueError(f'{self.__class__.__name__} header size too small ({total_size} < {hdr_struct.size})') |
| 73 | + for field, value in zip(self.STRUCT[eh.ei_class][1], hdr_struct.unpack(data[offset:offset + hdr_struct.size])): |
| 74 | + setattr(self, field, value) |
| 75 | + |
| 76 | +def BiStruct(chars: str) -> Dict[int, struct.Struct]: |
| 77 | + '''Compile a struct parser for both endians.''' |
| 78 | + return { |
| 79 | + ELFDATA2LSB: struct.Struct('<' + chars), |
| 80 | + ELFDATA2MSB: struct.Struct('>' + chars), |
| 81 | + } |
| 82 | + |
| 83 | +class ELFHeader(ELFRecord): |
| 84 | + FIELDS = ['e_type', 'e_machine', 'e_version', 'e_entry', 'e_phoff', 'e_shoff', 'e_flags', 'e_ehsize', 'e_phentsize', 'e_phnum', 'e_shentsize', 'e_shnum', 'e_shstrndx'] |
| 85 | + STRUCT = { |
| 86 | + ELFCLASS32: (BiStruct('HHIIIIIHHHHHH'), FIELDS), |
| 87 | + ELFCLASS64: (BiStruct('HHIQQQIHHHHHH'), FIELDS), |
| 88 | + } |
| 89 | + |
| 90 | + def __init__(self, data: bytes, offset: int) -> None: |
| 91 | + self.e_ident = data[offset:offset + EI_NIDENT] |
| 92 | + if self.e_ident[0:4] != b'\x7fELF': |
| 93 | + raise ValueError('invalid ELF magic') |
| 94 | + self.ei_class = self.e_ident[EI_CLASS] |
| 95 | + self.ei_data = self.e_ident[EI_DATA] |
| 96 | + |
| 97 | + super().__init__(data, offset + EI_NIDENT, self, None) |
| 98 | + |
| 99 | + def __repr__(self) -> str: |
| 100 | + return f'Header(e_ident={self.e_ident!r}, e_type={self.e_type}, e_machine={self.e_machine}, e_version={self.e_version}, e_entry={self.e_entry}, e_phoff={self.e_phoff}, e_shoff={self.e_shoff}, e_flags={self.e_flags}, e_ehsize={self.e_ehsize}, e_phentsize={self.e_phentsize}, e_phnum={self.e_phnum}, e_shentsize={self.e_shentsize}, e_shnum={self.e_shnum}, e_shstrndx={self.e_shstrndx})' |
| 101 | + |
| 102 | +class Section(ELFRecord): |
| 103 | + name: Optional[bytes] = None |
| 104 | + FIELDS = ['sh_name', 'sh_type', 'sh_flags', 'sh_addr', 'sh_offset', 'sh_size', 'sh_link', 'sh_info', 'sh_addralign', 'sh_entsize'] |
| 105 | + STRUCT = { |
| 106 | + ELFCLASS32: (BiStruct('IIIIIIIIII'), FIELDS), |
| 107 | + ELFCLASS64: (BiStruct('IIQQQQIIQQ'), FIELDS), |
| 108 | + } |
| 109 | + |
| 110 | + def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None: |
| 111 | + super().__init__(data, offset, eh, eh.e_shentsize) |
| 112 | + self._data = data |
| 113 | + |
| 114 | + def __repr__(self) -> str: |
| 115 | + return f'Section(sh_name={self.sh_name}({self.name!r}), sh_type=0x{self.sh_type:x}, sh_flags={self.sh_flags}, sh_addr=0x{self.sh_addr:x}, sh_offset=0x{self.sh_offset:x}, sh_size={self.sh_size}, sh_link={self.sh_link}, sh_info={self.sh_info}, sh_addralign={self.sh_addralign}, sh_entsize={self.sh_entsize})' |
| 116 | + |
| 117 | + def contents(self) -> bytes: |
| 118 | + '''Return section contents.''' |
| 119 | + return self._data[self.sh_offset:self.sh_offset + self.sh_size] |
| 120 | + |
| 121 | +class ProgramHeader(ELFRecord): |
| 122 | + STRUCT = { |
| 123 | + # different ELF classes have the same fields, but in a different order to optimize space versus alignment |
| 124 | + ELFCLASS32: (BiStruct('IIIIIIII'), ['p_type', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_flags', 'p_align']), |
| 125 | + ELFCLASS64: (BiStruct('IIQQQQQQ'), ['p_type', 'p_flags', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_align']), |
| 126 | + } |
| 127 | + |
| 128 | + def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None: |
| 129 | + super().__init__(data, offset, eh, eh.e_phentsize) |
| 130 | + |
| 131 | + def __repr__(self) -> str: |
| 132 | + return f'ProgramHeader(p_type={self.p_type}, p_offset={self.p_offset}, p_vaddr={self.p_vaddr}, p_paddr={self.p_paddr}, p_filesz={self.p_filesz}, p_memsz={self.p_memsz}, p_flags={self.p_flags}, p_align={self.p_align})' |
| 133 | + |
| 134 | +class Symbol(ELFRecord): |
| 135 | + STRUCT = { |
| 136 | + # different ELF classes have the same fields, but in a different order to optimize space versus alignment |
| 137 | + ELFCLASS32: (BiStruct('IIIBBH'), ['st_name', 'st_value', 'st_size', 'st_info', 'st_other', 'st_shndx']), |
| 138 | + ELFCLASS64: (BiStruct('IBBHQQ'), ['st_name', 'st_info', 'st_other', 'st_shndx', 'st_value', 'st_size']), |
| 139 | + } |
| 140 | + |
| 141 | + def __init__(self, data: bytes, offset: int, eh: ELFHeader, symtab: Section, strings: bytes, version: Optional[bytes]) -> None: |
| 142 | + super().__init__(data, offset, eh, symtab.sh_entsize) |
| 143 | + self.name = _lookup_string(strings, self.st_name) |
| 144 | + self.version = version |
| 145 | + |
| 146 | + def __repr__(self) -> str: |
| 147 | + return f'Symbol(st_name={self.st_name}({self.name!r}), st_value={self.st_value}, st_size={self.st_size}, st_info={self.st_info}, st_other={self.st_other}, st_shndx={self.st_shndx}, version={self.version!r})' |
| 148 | + |
| 149 | + @property |
| 150 | + def is_import(self) -> bool: |
| 151 | + '''Returns whether the symbol is an imported symbol.''' |
| 152 | + return self.st_bind != STB_LOCAL and self.st_shndx == 0 |
| 153 | + |
| 154 | + @property |
| 155 | + def is_export(self) -> bool: |
| 156 | + '''Returns whether the symbol is an exported symbol.''' |
| 157 | + return self.st_bind != STB_LOCAL and self.st_shndx != 0 |
| 158 | + |
| 159 | + @property |
| 160 | + def st_bind(self) -> int: |
| 161 | + '''Returns STB_*.''' |
| 162 | + return self.st_info >> 4 |
| 163 | + |
| 164 | +class Verneed(ELFRecord): |
| 165 | + DEF = (BiStruct('HHIII'), ['vn_version', 'vn_cnt', 'vn_file', 'vn_aux', 'vn_next']) |
| 166 | + STRUCT = { ELFCLASS32: DEF, ELFCLASS64: DEF } |
| 167 | + |
| 168 | + def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None: |
| 169 | + super().__init__(data, offset, eh, None) |
| 170 | + |
| 171 | + def __repr__(self) -> str: |
| 172 | + return f'Verneed(vn_version={self.vn_version}, vn_cnt={self.vn_cnt}, vn_file={self.vn_file}, vn_aux={self.vn_aux}, vn_next={self.vn_next})' |
| 173 | + |
| 174 | +class Vernaux(ELFRecord): |
| 175 | + DEF = (BiStruct('IHHII'), ['vna_hash', 'vna_flags', 'vna_other', 'vna_name', 'vna_next']) |
| 176 | + STRUCT = { ELFCLASS32: DEF, ELFCLASS64: DEF } |
| 177 | + |
| 178 | + def __init__(self, data: bytes, offset: int, eh: ELFHeader, strings: bytes) -> None: |
| 179 | + super().__init__(data, offset, eh, None) |
| 180 | + self.name = _lookup_string(strings, self.vna_name) |
| 181 | + |
| 182 | + def __repr__(self) -> str: |
| 183 | + return f'Veraux(vna_hash={self.vna_hash}, vna_flags={self.vna_flags}, vna_other={self.vna_other}, vna_name={self.vna_name}({self.name!r}), vna_next={self.vna_next})' |
| 184 | + |
| 185 | +class DynTag(ELFRecord): |
| 186 | + STRUCT = { |
| 187 | + ELFCLASS32: (BiStruct('II'), ['d_tag', 'd_val']), |
| 188 | + ELFCLASS64: (BiStruct('QQ'), ['d_tag', 'd_val']), |
| 189 | + } |
| 190 | + |
| 191 | + def __init__(self, data: bytes, offset: int, eh: ELFHeader, section: Section) -> None: |
| 192 | + super().__init__(data, offset, eh, section.sh_entsize) |
| 193 | + |
| 194 | + def __repr__(self) -> str: |
| 195 | + return f'DynTag(d_tag={self.d_tag}, d_val={self.d_val})' |
| 196 | + |
| 197 | +def _lookup_string(data: bytes, index: int) -> bytes: |
| 198 | + '''Look up string by offset in ELF string table.''' |
| 199 | + endx = data.find(b'\x00', index) |
| 200 | + assert endx != -1 |
| 201 | + return data[index:endx] |
| 202 | + |
| 203 | +VERSYM_S = BiStruct('H') # .gnu_version section has a single 16-bit integer per symbol in the linked section |
| 204 | +def _parse_symbol_table(section: Section, strings: bytes, eh: ELFHeader, versym: bytes, verneed: Dict[int, bytes]) -> List[Symbol]: |
| 205 | + '''Parse symbol table, return a list of symbols.''' |
| 206 | + data = section.contents() |
| 207 | + symbols = [] |
| 208 | + versym_iter = (verneed.get(v[0]) for v in VERSYM_S[eh.ei_data].iter_unpack(versym)) |
| 209 | + for ofs, version in zip(range(0, len(data), section.sh_entsize), versym_iter): |
| 210 | + symbols.append(Symbol(data, ofs, eh, section, strings, version)) |
| 211 | + return symbols |
| 212 | + |
| 213 | +def _parse_verneed(section: Section, strings: bytes, eh: ELFHeader) -> Dict[int, bytes]: |
| 214 | + '''Parse .gnu.version_r section, return a dictionary of {versym: 'GLIBC_...'}.''' |
| 215 | + data = section.contents() |
| 216 | + ofs = 0 |
| 217 | + result = {} |
| 218 | + while True: |
| 219 | + verneed = Verneed(data, ofs, eh) |
| 220 | + aofs = verneed.vn_aux |
| 221 | + while True: |
| 222 | + vernaux = Vernaux(data, aofs, eh, strings) |
| 223 | + result[vernaux.vna_other] = vernaux.name |
| 224 | + if not vernaux.vna_next: |
| 225 | + break |
| 226 | + aofs += vernaux.vna_next |
| 227 | + |
| 228 | + if not verneed.vn_next: |
| 229 | + break |
| 230 | + ofs += verneed.vn_next |
| 231 | + |
| 232 | + return result |
| 233 | + |
| 234 | +def _parse_dyn_tags(section: Section, strings: bytes, eh: ELFHeader) -> List[Tuple[int, Union[bytes, int]]]: |
| 235 | + '''Parse dynamic tags. Return array of tuples.''' |
| 236 | + data = section.contents() |
| 237 | + ofs = 0 |
| 238 | + result = [] |
| 239 | + for ofs in range(0, len(data), section.sh_entsize): |
| 240 | + tag = DynTag(data, ofs, eh, section) |
| 241 | + val = _lookup_string(strings, tag.d_val) if tag.d_tag in STRING_TAGS else tag.d_val |
| 242 | + result.append((tag.d_tag, val)) |
| 243 | + |
| 244 | + return result |
| 245 | + |
| 246 | +class ELFFile: |
| 247 | + sections: List[Section] |
| 248 | + program_headers: List[ProgramHeader] |
| 249 | + dyn_symbols: List[Symbol] |
| 250 | + dyn_tags: List[Tuple[int, Union[bytes, int]]] |
| 251 | + |
| 252 | + def __init__(self, data: bytes) -> None: |
| 253 | + self.data = data |
| 254 | + self.hdr = ELFHeader(self.data, 0) |
| 255 | + self._load_sections() |
| 256 | + self._load_program_headers() |
| 257 | + self._load_dyn_symbols() |
| 258 | + self._load_dyn_tags() |
| 259 | + self._section_to_segment_mapping() |
| 260 | + |
| 261 | + def _load_sections(self) -> None: |
| 262 | + self.sections = [] |
| 263 | + for idx in range(self.hdr.e_shnum): |
| 264 | + offset = self.hdr.e_shoff + idx * self.hdr.e_shentsize |
| 265 | + self.sections.append(Section(self.data, offset, self.hdr)) |
| 266 | + |
| 267 | + shstr = self.sections[self.hdr.e_shstrndx].contents() |
| 268 | + for section in self.sections: |
| 269 | + section.name = _lookup_string(shstr, section.sh_name) |
| 270 | + |
| 271 | + def _load_program_headers(self) -> None: |
| 272 | + self.program_headers = [] |
| 273 | + for idx in range(self.hdr.e_phnum): |
| 274 | + offset = self.hdr.e_phoff + idx * self.hdr.e_phentsize |
| 275 | + self.program_headers.append(ProgramHeader(self.data, offset, self.hdr)) |
| 276 | + |
| 277 | + def _load_dyn_symbols(self) -> None: |
| 278 | + # first, load 'verneed' section |
| 279 | + verneed = None |
| 280 | + for section in self.sections: |
| 281 | + if section.sh_type == SHT_GNU_verneed: |
| 282 | + strtab = self.sections[section.sh_link].contents() # associated string table |
| 283 | + assert verneed is None # only one section of this kind please |
| 284 | + verneed = _parse_verneed(section, strtab, self.hdr) |
| 285 | + assert verneed is not None |
| 286 | + |
| 287 | + # then, correlate GNU versym sections with dynamic symbol sections |
| 288 | + versym = {} |
| 289 | + for section in self.sections: |
| 290 | + if section.sh_type == SHT_GNU_versym: |
| 291 | + versym[section.sh_link] = section |
| 292 | + |
| 293 | + # finally, load dynsym sections |
| 294 | + self.dyn_symbols = [] |
| 295 | + for idx, section in enumerate(self.sections): |
| 296 | + if section.sh_type == SHT_DYNSYM: # find dynamic symbol tables |
| 297 | + strtab_data = self.sections[section.sh_link].contents() # associated string table |
| 298 | + versym_data = versym[idx].contents() # associated symbol version table |
| 299 | + self.dyn_symbols += _parse_symbol_table(section, strtab_data, self.hdr, versym_data, verneed) |
| 300 | + |
| 301 | + def _load_dyn_tags(self) -> None: |
| 302 | + self.dyn_tags = [] |
| 303 | + for idx, section in enumerate(self.sections): |
| 304 | + if section.sh_type == SHT_DYNAMIC: # find dynamic tag tables |
| 305 | + strtab = self.sections[section.sh_link].contents() # associated string table |
| 306 | + self.dyn_tags += _parse_dyn_tags(section, strtab, self.hdr) |
| 307 | + |
| 308 | + def _section_to_segment_mapping(self) -> None: |
| 309 | + for ph in self.program_headers: |
| 310 | + ph.sections = [] |
| 311 | + for section in self.sections: |
| 312 | + if ph.p_vaddr <= section.sh_addr < (ph.p_vaddr + ph.p_memsz): |
| 313 | + ph.sections.append(section) |
| 314 | + |
| 315 | + def query_dyn_tags(self, tag_in: int) -> List[Union[int, bytes]]: |
| 316 | + '''Return the values of all dyn tags with the specified tag.''' |
| 317 | + return [val for (tag, val) in self.dyn_tags if tag == tag_in] |
| 318 | + |
| 319 | + |
| 320 | +def load(filename: str) -> ELFFile: |
| 321 | + with open(filename, 'rb') as f: |
| 322 | + data = f.read() |
| 323 | + return ELFFile(data) |
0 commit comments