|
| 1 | +""" |
| 2 | +ELF file parser. |
| 3 | +
|
| 4 | +This provides a class ``ELFFile`` that parses an ELF executable in a similar |
| 5 | +interface to ``ZipFile``. Only the read interface is implemented. |
| 6 | +
|
| 7 | +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca |
| 8 | +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html |
| 9 | +""" |
| 10 | + |
| 11 | +import enum |
| 12 | +import os |
| 13 | +import struct |
| 14 | +from typing import IO, Optional, Tuple |
| 15 | + |
| 16 | + |
| 17 | +class ELFInvalid(ValueError): |
| 18 | + pass |
| 19 | + |
| 20 | + |
| 21 | +class EIClass(enum.IntEnum): |
| 22 | + C32 = 1 |
| 23 | + C64 = 2 |
| 24 | + |
| 25 | + |
| 26 | +class EIData(enum.IntEnum): |
| 27 | + Lsb = 1 |
| 28 | + Msb = 2 |
| 29 | + |
| 30 | + |
| 31 | +class EMachine(enum.IntEnum): |
| 32 | + I386 = 3 |
| 33 | + S390 = 22 |
| 34 | + Arm = 40 |
| 35 | + X8664 = 62 |
| 36 | + AArc64 = 183 |
| 37 | + |
| 38 | + |
| 39 | +class ELFFile: |
| 40 | + """ |
| 41 | + Representation of an ELF executable. |
| 42 | + """ |
| 43 | + |
| 44 | + def __init__(self, f: IO[bytes]) -> None: |
| 45 | + self._f = f |
| 46 | + |
| 47 | + try: |
| 48 | + ident = self._read("16B") |
| 49 | + except struct.error: |
| 50 | + raise ELFInvalid("unable to parse identification") |
| 51 | + magic = bytes(ident[:4]) |
| 52 | + if magic != b"\x7fELF": |
| 53 | + raise ELFInvalid(f"invalid magic: {magic!r}") |
| 54 | + |
| 55 | + self.capacity = ident[4] # Format for program header (bitness). |
| 56 | + self.encoding = ident[5] # Data structure encoding (endianness). |
| 57 | + |
| 58 | + try: |
| 59 | + # e_fmt: Format for program header. |
| 60 | + # p_fmt: Format for section header. |
| 61 | + # p_idx: Indexes to find p_type, p_offset, and p_filesz. |
| 62 | + e_fmt, self._p_fmt, self._p_idx = { |
| 63 | + (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB. |
| 64 | + (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. |
| 65 | + (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB. |
| 66 | + (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. |
| 67 | + }[(self.capacity, self.encoding)] |
| 68 | + except KeyError: |
| 69 | + raise ELFInvalid( |
| 70 | + f"unrecognized capacity ({self.capacity}) or " |
| 71 | + f"encoding ({self.encoding})" |
| 72 | + ) |
| 73 | + |
| 74 | + try: |
| 75 | + ( |
| 76 | + _, |
| 77 | + self.machine, # Architecture type. |
| 78 | + _, |
| 79 | + _, |
| 80 | + self._e_phoff, # Offset of program header. |
| 81 | + _, |
| 82 | + self.flags, # Processor-specific flags. |
| 83 | + _, |
| 84 | + self._e_phentsize, # Size of section. |
| 85 | + self._e_phnum, # Number of sections. |
| 86 | + ) = self._read(e_fmt) |
| 87 | + except struct.error as e: |
| 88 | + raise ELFInvalid("unable to parse machine and section information") from e |
| 89 | + |
| 90 | + def _read(self, fmt: str) -> Tuple[int, ...]: |
| 91 | + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) |
| 92 | + |
| 93 | + @property |
| 94 | + def interpreter(self) -> Optional[str]: |
| 95 | + """ |
| 96 | + The path recorded in the ``PT_INTERP`` section header. |
| 97 | + """ |
| 98 | + for index in range(self._e_phnum): |
| 99 | + self._f.seek(self._e_phoff + self._e_phentsize * index) |
| 100 | + try: |
| 101 | + data = self._read(self._p_fmt) |
| 102 | + except struct.error: |
| 103 | + continue |
| 104 | + if data[self._p_idx[0]] != 3: # Not PT_INTERP. |
| 105 | + continue |
| 106 | + self._f.seek(data[self._p_idx[1]]) |
| 107 | + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") |
| 108 | + return None |
0 commit comments