diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index c32bf4fd63cc8f..0ef1dcbd7d0565 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -31,6 +31,7 @@ concurrency: env: FORCE_COLOR: 1 + LLVM_VERSION: 20 jobs: interpreter: @@ -67,8 +68,6 @@ jobs: debug: - true - false - llvm: - - 19 include: - target: i686-pc-windows-msvc/msvc architecture: Win32 @@ -110,7 +109,7 @@ jobs: if: runner.os == 'macOS' run: | brew update - brew install llvm@${{ matrix.llvm }} + brew install llvm@${{ env.LLVM_VERSION }} export SDKROOT="$(xcrun --show-sdk-path)" # Set MACOSX_DEPLOYMENT_TARGET and -Werror=unguarded-availability to # make sure we don't break downstream distributors (like uv): @@ -123,8 +122,8 @@ jobs: - name: Linux if: runner.os == 'Linux' run: | - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} - export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ env.LLVM_VERSION }} + export PATH="$(llvm-config-${{ env.LLVM_VERSION }} --bindir):$PATH" ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '' }} make all --jobs 4 ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 @@ -134,11 +133,6 @@ jobs: needs: interpreter runs-on: ubuntu-24.04 timeout-minutes: 90 - strategy: - fail-fast: false - matrix: - llvm: - - 19 steps: - uses: actions/checkout@v4 with: @@ -148,8 +142,8 @@ jobs: python-version: '3.11' - name: Build with JIT enabled and GIL disabled run: | - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} - export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ env.LLVM_VERSION }} + export PATH="$(llvm-config-${{ env.LLVM_VERSION }} --bindir):$PATH" ./configure --enable-experimental-jit --with-pydebug --disable-gil make all --jobs 4 - name: Run tests @@ -162,11 +156,6 @@ jobs: needs: interpreter runs-on: ubuntu-24.04 timeout-minutes: 90 - strategy: - fail-fast: false - matrix: - llvm: - - 19 steps: - uses: actions/checkout@v4 with: @@ -176,8 +165,8 @@ jobs: python-version: '3.11' - name: Build with JIT run: | - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} - export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ env.LLVM_VERSION }} + export PATH="$(llvm-config-${{ env.LLVM_VERSION }} --bindir):$PATH" ./configure --enable-experimental-jit --with-pydebug make all --jobs 4 - name: Run tests without optimizations diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst new file mode 100644 index 00000000000000..fffc264a8650e0 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-19-10-32-28.gh-issue-136895.HfsEh0.rst @@ -0,0 +1 @@ +Update JIT compilation to use LLVM 20 at build time. diff --git a/PCbuild/get_external.py b/PCbuild/get_external.py index a78aa6a23041ad..ea7287d0110ae9 100755 --- a/PCbuild/get_external.py +++ b/PCbuild/get_external.py @@ -3,6 +3,7 @@ import argparse import os import pathlib +import shutil import sys import time import urllib.error @@ -10,27 +11,33 @@ import zipfile -def retrieve_with_retries(download_location, output_path, reporthook, - max_retries=7): - """Download a file with exponential backoff retry and save to disk.""" +def request_with_retry(request_func, *args, max_retries=7, + err_msg='Request failed.', **kwargs): + """Make a request using request_func with exponential backoff""" for attempt in range(max_retries + 1): try: - resp = urllib.request.urlretrieve( - download_location, - output_path, - reporthook=reporthook, - ) + resp = request_func(*args, **kwargs) except (urllib.error.URLError, ConnectionError) as ex: if attempt == max_retries: - msg = f"Download from {download_location} failed." - raise OSError(msg) from ex + raise OSError(err_msg) from ex time.sleep(2.25**attempt) else: return resp -def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): - repo = f'cpython-{"bin" if binary else "source"}-deps' +def retrieve_with_retries(download_location, output_path, reporthook): + """Download a file with retries.""" + return request_with_retry( + urllib.request.urlretrieve, + download_location, + output_path, + reporthook, + err_msg=f'Download from {download_location} failed.', + ) + + +def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose=False): + repo = 'cpython-bin-deps' if binary else 'cpython-source-deps' url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip' reporthook = None if verbose: @@ -44,6 +51,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): return filename +def fetch_release(tag, tarball_dir, *, org='python', verbose=False): + url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz' + reporthook = None + if verbose: + reporthook = print + tarball_dir.mkdir(parents=True, exist_ok=True) + output_path = tarball_dir / f'{tag}.tar.xz' + retrieve_with_retries(url, output_path, reporthook) + return output_path + + +def extract_tarball(externals_dir, tarball_path, tag): + output_path = externals_dir / tag + shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path)) + return output_path + + def extract_zip(externals_dir, zip_path): with zipfile.ZipFile(os.fspath(zip_path)) as zf: zf.extractall(os.fspath(externals_dir)) @@ -55,6 +79,8 @@ def parse_args(): p.add_argument('-v', '--verbose', action='store_true') p.add_argument('-b', '--binary', action='store_true', help='Is the dependency in the binary repo?') + p.add_argument('-r', '--release', action='store_true', + help='Download from GitHub release assets instead of branch') p.add_argument('-O', '--organization', help='Organization owning the deps repos', default='python') p.add_argument('-e', '--externals-dir', type=pathlib.Path, @@ -67,15 +93,36 @@ def parse_args(): def main(): args = parse_args() - zip_path = fetch_zip( - args.tag, - args.externals_dir / 'zips', - org=args.organization, - binary=args.binary, - verbose=args.verbose, - ) final_name = args.externals_dir / args.tag - extracted = extract_zip(args.externals_dir, zip_path) + + # Check if the dependency already exists in externals/ directory + # (either already downloaded/extracted, or checked into the git tree) + if final_name.exists(): + if args.verbose: + print(f'{args.tag} already exists at {final_name}, skipping download.') + return + + # Determine download method: release artifacts for large deps (like LLVM), + # otherwise zip download from GitHub branches + if args.release: + tarball_path = fetch_release( + args.tag, + args.externals_dir / 'tarballs', + org=args.organization, + verbose=args.verbose, + ) + extracted = extract_tarball(args.externals_dir, tarball_path, args.tag) + else: + # Use zip download from GitHub branches + # (cpython-bin-deps if --binary, cpython-source-deps otherwise) + zip_path = fetch_zip( + args.tag, + args.externals_dir / 'zips', + org=args.organization, + binary=args.binary, + verbose=args.verbose, + ) + extracted = extract_zip(args.externals_dir, zip_path) for wait in [1, 2, 3, 5, 8, 0]: try: extracted.replace(final_name) diff --git a/PCbuild/get_externals.bat b/PCbuild/get_externals.bat index 50a227b563a7c0..c72b27f3b88dc3 100644 --- a/PCbuild/get_externals.bat +++ b/PCbuild/get_externals.bat @@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4 if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18 if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0 if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06 -if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0 +if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0 for %%b in (%binaries%) do ( if exist "%EXTERNALS_DIR%\%%b" ( @@ -92,7 +92,11 @@ for %%b in (%binaries%) do ( git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b" ) else ( echo.Fetching %%b... - %PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b + if "%%b"=="llvm-20.1.8.0" ( + %PYTHON% -E "%PCBUILD%\get_external.py" -r -O %ORG% -e "%EXTERNALS_DIR%" %%b + ) else ( + %PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b + ) ) ) diff --git a/Python/jit.c b/Python/jit.c index c3f3d686013fe4..f557904a582f98 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -444,12 +444,18 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value) } void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state); +void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state); #include "jit_stencils.h" #if defined(__aarch64__) || defined(_M_ARM64) #define TRAMPOLINE_SIZE 16 #define DATA_ALIGN 8 +#elif defined(__x86_64__) && defined(__APPLE__) + // LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative + // range. + #define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment + #define DATA_ALIGN 8 #else #define TRAMPOLINE_SIZE 0 #define DATA_ALIGN 1 @@ -501,6 +507,48 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state) patch_aarch64_26r(location, (uintptr_t)p); } +// Generate and patch x86_64 trampolines. +void +patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state) +{ + uint64_t value = (uintptr_t)symbols_map[ordinal]; + int64_t range = (int64_t)value - 4 - (int64_t)location; + + // If we are in range of 32 signed bits, we can patch directly + if (range >= -(1LL << 31) && range < (1LL << 31)) { + patch_32r(location, value - 4); + return; + } + + // Out of range - need a trampoline + const uint32_t symbol_mask = 1 << (ordinal % 32); + const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32]; + assert(symbol_mask & trampoline_mask); + + // Count the number of set bits in the trampoline mask lower than ordinal + int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1)); + for (int i = 0; i < ordinal / 32; i++) { + index += _Py_popcount32(state->trampolines.mask[i]); + } + + unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE; + assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size); + + /* Generate the trampoline (14 bytes, padded to 16): + 0: ff 25 00 00 00 00 jmp *(%rip) + 6: XX XX XX XX XX XX XX XX (64-bit target address) + + Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64) + */ + trampoline[0] = 0xFF; + trampoline[1] = 0x25; + *(uint32_t *)(trampoline + 2) = 0; + *(uint64_t *)(trampoline + 6) = value; + + // Patch the call site to call the trampoline instead + patch_32r(location, (uintptr_t)trampoline - 4); +} + static void combine_symbol_mask(const symbol_mask src, symbol_mask dest) { diff --git a/Tools/jit/README.md b/Tools/jit/README.md index 35c7ffd7a283f8..c4b29877967b8e 100644 --- a/Tools/jit/README.md +++ b/Tools/jit/README.md @@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT. The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon). -LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. +LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. It's easy to install all of the required tools: ### Linux -Install LLVM 19 on Ubuntu/Debian: +Install LLVM 20 on Ubuntu/Debian: ```sh wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh -sudo ./llvm.sh 19 +sudo ./llvm.sh 20 ``` -Install LLVM 19 on Fedora Linux 40 or newer: +Install LLVM 20 on Fedora Linux 40 or newer: ```sh -sudo dnf install 'clang(major) = 19' 'llvm(major) = 19' +sudo dnf install 'clang(major) = 20' 'llvm(major) = 20' ``` ### macOS -Install LLVM 19 with [Homebrew](https://brew.sh): +Install LLVM 20 with [Homebrew](https://brew.sh): ```sh -brew install llvm@19 +brew install llvm@20 ``` Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them. @@ -43,12 +43,12 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`. -Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** +Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** Alternatively, you can use [chocolatey](https://chocolatey.org): ```sh -choco install llvm --version=19.1.0 +choco install llvm --version=20.1.8 ``` ### Dev Containers diff --git a/Tools/jit/_llvm.py b/Tools/jit/_llvm.py index bc3b50ffe61634..54c2bf86a36ed6 100644 --- a/Tools/jit/_llvm.py +++ b/Tools/jit/_llvm.py @@ -11,8 +11,8 @@ import _targets -_LLVM_VERSION = "19" -_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0" +_LLVM_VERSION = "20" +_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0" _P = typing.ParamSpec("_P") _R = typing.TypeVar("_R") diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 16bc1ea4e17e6b..44ba4503819bf1 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -241,6 +241,23 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None: self._trampolines.add(ordinal) hole.addend = ordinal hole.symbol = None + # x86_64 Darwin trampolines for external symbols + elif ( + hole.kind == "X86_64_RELOC_BRANCH" + and hole.value is HoleValue.ZERO + and hole.symbol not in self.symbols + ): + hole.func = "patch_x86_64_trampoline" + hole.need_state = True + assert hole.symbol is not None + if hole.symbol in known_symbols: + ordinal = known_symbols[hole.symbol] + else: + ordinal = len(known_symbols) + known_symbols[hole.symbol] = ordinal + self._trampolines.add(ordinal) + hole.addend = ordinal + hole.symbol = None self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 7ff7c4fba49652..7dbe2cca767b1d 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -166,10 +166,6 @@ async def _compile( "-fno-asynchronous-unwind-tables", # Don't call built-in functions that we can't find or patch: "-fno-builtin", - # Emit relaxable 64-bit calls/jumps, so we don't have to worry about - # about emitting in-range trampolines for out-of-range targets. - # We can probably remove this and emit trampolines in the future: - "-fno-plt", # Don't call stack-smashing canaries that we can't find or patch: "-fno-stack-protector", "-std=c11", @@ -570,14 +566,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: elif re.fullmatch(r"aarch64-pc-windows-msvc", host): host = "aarch64-pc-windows-msvc" condition = "defined(_M_ARM64)" - args = ["-fms-runtime-lib=dll", "-fplt"] + args = ["-fms-runtime-lib=dll"] optimizer = _optimizers.OptimizerAArch64 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): host = "aarch64-unknown-linux-gnu" condition = "defined(__aarch64__) && defined(__linux__)" # -mno-outline-atomics: Keep intrinsics from being emitted. - args = ["-fpic", "-mno-outline-atomics"] + args = ["-fpic", "-mno-outline-atomics", "-fno-plt"] optimizer = _optimizers.OptimizerAArch64 target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): @@ -601,7 +597,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: elif re.fullmatch(r"x86_64-.*-linux-gnu", host): host = "x86_64-unknown-linux-gnu" condition = "defined(__x86_64__) && defined(__linux__)" - args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] + args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"] optimizer = _optimizers.OptimizerX86 target = _ELF(host, condition, args=args, optimizer=optimizer) else: