|
| 1 | +import collections |
| 2 | +import functools |
| 3 | +import os |
| 4 | +import re |
| 5 | +import struct |
| 6 | +import sys |
| 7 | +import warnings |
| 8 | +from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple |
| 9 | + |
| 10 | + |
| 11 | +# Python does not provide platform information at sufficient granularity to |
| 12 | +# identify the architecture of the running executable in some cases, so we |
| 13 | +# determine it dynamically by reading the information from the running |
| 14 | +# process. This only applies on Linux, which uses the ELF format. |
| 15 | +class _ELFFileHeader: |
| 16 | + # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header |
| 17 | + class _InvalidELFFileHeader(ValueError): |
| 18 | + """ |
| 19 | + An invalid ELF file header was found. |
| 20 | + """ |
| 21 | + |
| 22 | + ELF_MAGIC_NUMBER = 0x7F454C46 |
| 23 | + ELFCLASS32 = 1 |
| 24 | + ELFCLASS64 = 2 |
| 25 | + ELFDATA2LSB = 1 |
| 26 | + ELFDATA2MSB = 2 |
| 27 | + EM_386 = 3 |
| 28 | + EM_S390 = 22 |
| 29 | + EM_ARM = 40 |
| 30 | + EM_X86_64 = 62 |
| 31 | + EF_ARM_ABIMASK = 0xFF000000 |
| 32 | + EF_ARM_ABI_VER5 = 0x05000000 |
| 33 | + EF_ARM_ABI_FLOAT_HARD = 0x00000400 |
| 34 | + |
| 35 | + def __init__(self, file: IO[bytes]) -> None: |
| 36 | + def unpack(fmt: str) -> int: |
| 37 | + try: |
| 38 | + data = file.read(struct.calcsize(fmt)) |
| 39 | + result: Tuple[int, ...] = struct.unpack(fmt, data) |
| 40 | + except struct.error: |
| 41 | + raise _ELFFileHeader._InvalidELFFileHeader() |
| 42 | + return result[0] |
| 43 | + |
| 44 | + self.e_ident_magic = unpack(">I") |
| 45 | + if self.e_ident_magic != self.ELF_MAGIC_NUMBER: |
| 46 | + raise _ELFFileHeader._InvalidELFFileHeader() |
| 47 | + self.e_ident_class = unpack("B") |
| 48 | + if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: |
| 49 | + raise _ELFFileHeader._InvalidELFFileHeader() |
| 50 | + self.e_ident_data = unpack("B") |
| 51 | + if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: |
| 52 | + raise _ELFFileHeader._InvalidELFFileHeader() |
| 53 | + self.e_ident_version = unpack("B") |
| 54 | + self.e_ident_osabi = unpack("B") |
| 55 | + self.e_ident_abiversion = unpack("B") |
| 56 | + self.e_ident_pad = file.read(7) |
| 57 | + format_h = "<H" if self.e_ident_data == self.ELFDATA2LSB else ">H" |
| 58 | + format_i = "<I" if self.e_ident_data == self.ELFDATA2LSB else ">I" |
| 59 | + format_q = "<Q" if self.e_ident_data == self.ELFDATA2LSB else ">Q" |
| 60 | + format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q |
| 61 | + self.e_type = unpack(format_h) |
| 62 | + self.e_machine = unpack(format_h) |
| 63 | + self.e_version = unpack(format_i) |
| 64 | + self.e_entry = unpack(format_p) |
| 65 | + self.e_phoff = unpack(format_p) |
| 66 | + self.e_shoff = unpack(format_p) |
| 67 | + self.e_flags = unpack(format_i) |
| 68 | + self.e_ehsize = unpack(format_h) |
| 69 | + self.e_phentsize = unpack(format_h) |
| 70 | + self.e_phnum = unpack(format_h) |
| 71 | + self.e_shentsize = unpack(format_h) |
| 72 | + self.e_shnum = unpack(format_h) |
| 73 | + self.e_shstrndx = unpack(format_h) |
| 74 | + |
| 75 | + |
| 76 | +def _get_elf_header() -> Optional[_ELFFileHeader]: |
| 77 | + try: |
| 78 | + with open(sys.executable, "rb") as f: |
| 79 | + elf_header = _ELFFileHeader(f) |
| 80 | + except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): |
| 81 | + return None |
| 82 | + return elf_header |
| 83 | + |
| 84 | + |
| 85 | +def _is_linux_armhf() -> bool: |
| 86 | + # hard-float ABI can be detected from the ELF header of the running |
| 87 | + # process |
| 88 | + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf |
| 89 | + elf_header = _get_elf_header() |
| 90 | + if elf_header is None: |
| 91 | + return False |
| 92 | + result = elf_header.e_ident_class == elf_header.ELFCLASS32 |
| 93 | + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB |
| 94 | + result &= elf_header.e_machine == elf_header.EM_ARM |
| 95 | + result &= ( |
| 96 | + elf_header.e_flags & elf_header.EF_ARM_ABIMASK |
| 97 | + ) == elf_header.EF_ARM_ABI_VER5 |
| 98 | + result &= ( |
| 99 | + elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD |
| 100 | + ) == elf_header.EF_ARM_ABI_FLOAT_HARD |
| 101 | + return result |
| 102 | + |
| 103 | + |
| 104 | +def _is_linux_i686() -> bool: |
| 105 | + elf_header = _get_elf_header() |
| 106 | + if elf_header is None: |
| 107 | + return False |
| 108 | + result = elf_header.e_ident_class == elf_header.ELFCLASS32 |
| 109 | + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB |
| 110 | + result &= elf_header.e_machine == elf_header.EM_386 |
| 111 | + return result |
| 112 | + |
| 113 | + |
| 114 | +def _have_compatible_abi(arch: str) -> bool: |
| 115 | + if arch == "armv7l": |
| 116 | + return _is_linux_armhf() |
| 117 | + if arch == "i686": |
| 118 | + return _is_linux_i686() |
| 119 | + return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} |
| 120 | + |
| 121 | + |
| 122 | +# If glibc ever changes its major version, we need to know what the last |
| 123 | +# minor version was, so we can build the complete list of all versions. |
| 124 | +# For now, guess what the highest minor version might be, assume it will |
| 125 | +# be 50 for testing. Once this actually happens, update the dictionary |
| 126 | +# with the actual value. |
| 127 | +_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) |
| 128 | + |
| 129 | + |
| 130 | +class _GLibCVersion(NamedTuple): |
| 131 | + major: int |
| 132 | + minor: int |
| 133 | + |
| 134 | + |
| 135 | +def _glibc_version_string_confstr() -> Optional[str]: |
| 136 | + """ |
| 137 | + Primary implementation of glibc_version_string using os.confstr. |
| 138 | + """ |
| 139 | + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely |
| 140 | + # to be broken or missing. This strategy is used in the standard library |
| 141 | + # platform module. |
| 142 | + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 |
| 143 | + try: |
| 144 | + # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". |
| 145 | + version_string = os.confstr("CS_GNU_LIBC_VERSION") |
| 146 | + assert version_string is not None |
| 147 | + _, version = version_string.split() |
| 148 | + except (AssertionError, AttributeError, OSError, ValueError): |
| 149 | + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... |
| 150 | + return None |
| 151 | + return version |
| 152 | + |
| 153 | + |
| 154 | +def _glibc_version_string_ctypes() -> Optional[str]: |
| 155 | + """ |
| 156 | + Fallback implementation of glibc_version_string using ctypes. |
| 157 | + """ |
| 158 | + try: |
| 159 | + import ctypes |
| 160 | + except ImportError: |
| 161 | + return None |
| 162 | + |
| 163 | + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen |
| 164 | + # manpage says, "If filename is NULL, then the returned handle is for the |
| 165 | + # main program". This way we can let the linker do the work to figure out |
| 166 | + # which libc our process is actually using. |
| 167 | + # |
| 168 | + # We must also handle the special case where the executable is not a |
| 169 | + # dynamically linked executable. This can occur when using musl libc, |
| 170 | + # for example. In this situation, dlopen() will error, leading to an |
| 171 | + # OSError. Interestingly, at least in the case of musl, there is no |
| 172 | + # errno set on the OSError. The single string argument used to construct |
| 173 | + # OSError comes from libc itself and is therefore not portable to |
| 174 | + # hard code here. In any case, failure to call dlopen() means we |
| 175 | + # can proceed, so we bail on our attempt. |
| 176 | + try: |
| 177 | + process_namespace = ctypes.CDLL(None) |
| 178 | + except OSError: |
| 179 | + return None |
| 180 | + |
| 181 | + try: |
| 182 | + gnu_get_libc_version = process_namespace.gnu_get_libc_version |
| 183 | + except AttributeError: |
| 184 | + # Symbol doesn't exist -> therefore, we are not linked to |
| 185 | + # glibc. |
| 186 | + return None |
| 187 | + |
| 188 | + # Call gnu_get_libc_version, which returns a string like "2.5" |
| 189 | + gnu_get_libc_version.restype = ctypes.c_char_p |
| 190 | + version_str: str = gnu_get_libc_version() |
| 191 | + # py2 / py3 compatibility: |
| 192 | + if not isinstance(version_str, str): |
| 193 | + version_str = version_str.decode("ascii") |
| 194 | + |
| 195 | + return version_str |
| 196 | + |
| 197 | + |
| 198 | +def _glibc_version_string() -> Optional[str]: |
| 199 | + """Returns glibc version string, or None if not using glibc.""" |
| 200 | + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() |
| 201 | + |
| 202 | + |
| 203 | +def _parse_glibc_version(version_str: str) -> Tuple[int, int]: |
| 204 | + """Parse glibc version. |
| 205 | +
|
| 206 | + We use a regexp instead of str.split because we want to discard any |
| 207 | + random junk that might come after the minor version -- this might happen |
| 208 | + in patched/forked versions of glibc (e.g. Linaro's version of glibc |
| 209 | + uses version strings like "2.20-2014.11"). See gh-3588. |
| 210 | + """ |
| 211 | + m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str) |
| 212 | + if not m: |
| 213 | + warnings.warn( |
| 214 | + "Expected glibc version with 2 components major.minor," |
| 215 | + " got: %s" % version_str, |
| 216 | + RuntimeWarning, |
| 217 | + ) |
| 218 | + return -1, -1 |
| 219 | + return int(m.group("major")), int(m.group("minor")) |
| 220 | + |
| 221 | + |
| 222 | +@functools.lru_cache() |
| 223 | +def _get_glibc_version() -> Tuple[int, int]: |
| 224 | + version_str = _glibc_version_string() |
| 225 | + if version_str is None: |
| 226 | + return (-1, -1) |
| 227 | + return _parse_glibc_version(version_str) |
| 228 | + |
| 229 | + |
| 230 | +# From PEP 513, PEP 600 |
| 231 | +def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: |
| 232 | + sys_glibc = _get_glibc_version() |
| 233 | + if sys_glibc < version: |
| 234 | + return False |
| 235 | + # Check for presence of _manylinux module. |
| 236 | + try: |
| 237 | + import _manylinux # noqa |
| 238 | + except ImportError: |
| 239 | + return True |
| 240 | + if hasattr(_manylinux, "manylinux_compatible"): |
| 241 | + result = _manylinux.manylinux_compatible(version[0], version[1], arch) |
| 242 | + if result is not None: |
| 243 | + return bool(result) |
| 244 | + return True |
| 245 | + if version == _GLibCVersion(2, 5): |
| 246 | + if hasattr(_manylinux, "manylinux1_compatible"): |
| 247 | + return bool(_manylinux.manylinux1_compatible) |
| 248 | + if version == _GLibCVersion(2, 12): |
| 249 | + if hasattr(_manylinux, "manylinux2010_compatible"): |
| 250 | + return bool(_manylinux.manylinux2010_compatible) |
| 251 | + if version == _GLibCVersion(2, 17): |
| 252 | + if hasattr(_manylinux, "manylinux2014_compatible"): |
| 253 | + return bool(_manylinux.manylinux2014_compatible) |
| 254 | + return True |
| 255 | + |
| 256 | + |
| 257 | +_LEGACY_MANYLINUX_MAP = { |
| 258 | + # CentOS 7 w/ glibc 2.17 (PEP 599) |
| 259 | + (2, 17): "manylinux2014", |
| 260 | + # CentOS 6 w/ glibc 2.12 (PEP 571) |
| 261 | + (2, 12): "manylinux2010", |
| 262 | + # CentOS 5 w/ glibc 2.5 (PEP 513) |
| 263 | + (2, 5): "manylinux1", |
| 264 | +} |
| 265 | + |
| 266 | + |
| 267 | +def platform_tags(linux: str, arch: str) -> Iterator[str]: |
| 268 | + if not _have_compatible_abi(arch): |
| 269 | + return |
| 270 | + # Oldest glibc to be supported regardless of architecture is (2, 17). |
| 271 | + too_old_glibc2 = _GLibCVersion(2, 16) |
| 272 | + if arch in {"x86_64", "i686"}: |
| 273 | + # On x86/i686 also oldest glibc to be supported is (2, 5). |
| 274 | + too_old_glibc2 = _GLibCVersion(2, 4) |
| 275 | + current_glibc = _GLibCVersion(*_get_glibc_version()) |
| 276 | + glibc_max_list = [current_glibc] |
| 277 | + # We can assume compatibility across glibc major versions. |
| 278 | + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 |
| 279 | + # |
| 280 | + # Build a list of maximum glibc versions so that we can |
| 281 | + # output the canonical list of all glibc from current_glibc |
| 282 | + # down to too_old_glibc2, including all intermediary versions. |
| 283 | + for glibc_major in range(current_glibc.major - 1, 1, -1): |
| 284 | + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] |
| 285 | + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) |
| 286 | + for glibc_max in glibc_max_list: |
| 287 | + if glibc_max.major == too_old_glibc2.major: |
| 288 | + min_minor = too_old_glibc2.minor |
| 289 | + else: |
| 290 | + # For other glibc major versions oldest supported is (x, 0). |
| 291 | + min_minor = -1 |
| 292 | + for glibc_minor in range(glibc_max.minor, min_minor, -1): |
| 293 | + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) |
| 294 | + tag = "manylinux_{}_{}".format(*glibc_version) |
| 295 | + if _is_compatible(tag, arch, glibc_version): |
| 296 | + yield linux.replace("linux", tag) |
| 297 | + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. |
| 298 | + if glibc_version in _LEGACY_MANYLINUX_MAP: |
| 299 | + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] |
| 300 | + if _is_compatible(legacy_tag, arch, glibc_version): |
| 301 | + yield linux.replace("linux", legacy_tag) |
0 commit comments