Skip to content

Commit 4e2ff4a

Browse files
pythonGH-136895: Update JIT builds to use LLVM 20 (python#140329)
Co-authored-by: Emma Harper Smith <[email protected]>
1 parent b373d34 commit 4e2ff4a

File tree

9 files changed

+151
-50
lines changed

9 files changed

+151
-50
lines changed

.github/workflows/jit.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jobs:
6868
- true
6969
- false
7070
llvm:
71-
- 19
71+
- 20
7272
include:
7373
- target: i686-pc-windows-msvc/msvc
7474
architecture: Win32
@@ -138,7 +138,7 @@ jobs:
138138
fail-fast: false
139139
matrix:
140140
llvm:
141-
- 19
141+
- 20
142142
steps:
143143
- uses: actions/checkout@v4
144144
with:
@@ -166,7 +166,7 @@ jobs:
166166
fail-fast: false
167167
matrix:
168168
llvm:
169-
- 19
169+
- 20
170170
steps:
171171
- uses: actions/checkout@v4
172172
with:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Update JIT compilation to use LLVM 20 at build time.

PCbuild/get_external.py

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import argparse
44
import os
55
import pathlib
6+
import shutil
67
import sys
78
import time
89
import urllib.error
@@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook,
2223
)
2324
except (urllib.error.URLError, ConnectionError) as ex:
2425
if attempt == max_retries:
25-
msg = f"Download from {download_location} failed."
26-
raise OSError(msg) from ex
26+
raise OSError(f'Download from {download_location} failed.') from ex
2727
time.sleep(2.25**attempt)
2828
else:
2929
return resp
3030

31-
3231
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
33-
repo = f'cpython-{"bin" if binary else "source"}-deps'
32+
repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
3433
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
3534
reporthook = None
3635
if verbose:
@@ -44,6 +43,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
4443
return filename
4544

4645

46+
def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
47+
url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
48+
reporthook = None
49+
if verbose:
50+
reporthook = print
51+
tarball_dir.mkdir(parents=True, exist_ok=True)
52+
output_path = tarball_dir / f'{tag}.tar.xz'
53+
retrieve_with_retries(url, output_path, reporthook)
54+
return output_path
55+
56+
57+
def extract_tarball(externals_dir, tarball_path, tag):
58+
output_path = externals_dir / tag
59+
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
60+
return output_path
61+
62+
4763
def extract_zip(externals_dir, zip_path):
4864
with zipfile.ZipFile(os.fspath(zip_path)) as zf:
4965
zf.extractall(os.fspath(externals_dir))
@@ -55,6 +71,8 @@ def parse_args():
5571
p.add_argument('-v', '--verbose', action='store_true')
5672
p.add_argument('-b', '--binary', action='store_true',
5773
help='Is the dependency in the binary repo?')
74+
p.add_argument('-r', '--release', action='store_true',
75+
help='Download from GitHub release assets instead of branch')
5876
p.add_argument('-O', '--organization',
5977
help='Organization owning the deps repos', default='python')
6078
p.add_argument('-e', '--externals-dir', type=pathlib.Path,
@@ -67,15 +85,36 @@ def parse_args():
6785

6886
def main():
6987
args = parse_args()
70-
zip_path = fetch_zip(
71-
args.tag,
72-
args.externals_dir / 'zips',
73-
org=args.organization,
74-
binary=args.binary,
75-
verbose=args.verbose,
76-
)
7788
final_name = args.externals_dir / args.tag
78-
extracted = extract_zip(args.externals_dir, zip_path)
89+
90+
# Check if the dependency already exists in externals/ directory
91+
# (either already downloaded/extracted, or checked into the git tree)
92+
if final_name.exists():
93+
if args.verbose:
94+
print(f'{args.tag} already exists at {final_name}, skipping download.')
95+
return
96+
97+
# Determine download method: release artifacts for large deps (like LLVM),
98+
# otherwise zip download from GitHub branches
99+
if args.release:
100+
tarball_path = fetch_release(
101+
args.tag,
102+
args.externals_dir / 'tarballs',
103+
org=args.organization,
104+
verbose=args.verbose,
105+
)
106+
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
107+
else:
108+
# Use zip download from GitHub branches
109+
# (cpython-bin-deps if --binary, cpython-source-deps otherwise)
110+
zip_path = fetch_zip(
111+
args.tag,
112+
args.externals_dir / 'zips',
113+
org=args.organization,
114+
binary=args.binary,
115+
verbose=args.verbose,
116+
)
117+
extracted = extract_zip(args.externals_dir, zip_path)
79118
for wait in [1, 2, 3, 5, 8, 0]:
80119
try:
81120
extracted.replace(final_name)

PCbuild/get_externals.bat

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4
8282
if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18
8383
if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
8484
if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06
85-
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0
85+
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0
8686

8787
for %%b in (%binaries%) do (
8888
if exist "%EXTERNALS_DIR%\%%b" (
@@ -92,7 +92,11 @@ for %%b in (%binaries%) do (
9292
git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
9393
) else (
9494
echo.Fetching %%b...
95-
%PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b
95+
if "%%b"=="llvm-20.1.8.0" (
96+
%PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
97+
) else (
98+
%PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
99+
)
96100
)
97101
)
98102

Python/jit.c

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
444444
}
445445

446446
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
447+
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
447448

448449
#include "jit_stencils.h"
449450

450451
#if defined(__aarch64__) || defined(_M_ARM64)
451452
#define TRAMPOLINE_SIZE 16
452453
#define DATA_ALIGN 8
454+
#elif defined(__x86_64__) && defined(__APPLE__)
455+
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
456+
// range.
457+
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
458+
#define DATA_ALIGN 8
453459
#else
454460
#define TRAMPOLINE_SIZE 0
455461
#define DATA_ALIGN 1
456462
#endif
457463

464+
// Get the trampoline memory location for a given symbol ordinal.
465+
static unsigned char *
466+
get_trampoline_slot(int ordinal, jit_state *state)
467+
{
468+
const uint32_t symbol_mask = 1 << (ordinal % 32);
469+
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
470+
assert(symbol_mask & trampoline_mask);
471+
472+
// Count the number of set bits in the trampoline mask lower than ordinal
473+
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
474+
for (int i = 0; i < ordinal / 32; i++) {
475+
index += _Py_popcount32(state->trampolines.mask[i]);
476+
}
477+
478+
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
479+
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
480+
return trampoline;
481+
}
482+
458483
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
459484
// in the jit_stencils.h in the symbols_map.
460485
void
@@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
471496
return;
472497
}
473498

474-
// Masking is done modulo 32 as the mask is stored as an array of uint32_t
475-
const uint32_t symbol_mask = 1 << (ordinal % 32);
476-
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
477-
assert(symbol_mask & trampoline_mask);
478-
479-
// Count the number of set bits in the trampoline mask lower than ordinal,
480-
// this gives the index into the array of trampolines.
481-
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
482-
for (int i = 0; i < ordinal / 32; i++) {
483-
index += _Py_popcount32(state->trampolines.mask[i]);
484-
}
485-
486-
uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
487-
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
499+
// Out of range - need a trampoline
500+
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
488501

489502

490503
/* Generate the trampoline
@@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
501514
patch_aarch64_26r(location, (uintptr_t)p);
502515
}
503516

517+
// Generate and patch x86_64 trampolines.
518+
void
519+
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
520+
{
521+
uint64_t value = (uintptr_t)symbols_map[ordinal];
522+
int64_t range = (int64_t)value - 4 - (int64_t)location;
523+
524+
// If we are in range of 32 signed bits, we can patch directly
525+
if (range >= -(1LL << 31) && range < (1LL << 31)) {
526+
patch_32r(location, value - 4);
527+
return;
528+
}
529+
530+
// Out of range - need a trampoline
531+
unsigned char *trampoline = get_trampoline_slot(ordinal, state);
532+
533+
/* Generate the trampoline (14 bytes, padded to 16):
534+
0: ff 25 00 00 00 00 jmp *(%rip)
535+
6: XX XX XX XX XX XX XX XX (64-bit target address)
536+
537+
Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
538+
*/
539+
trampoline[0] = 0xFF;
540+
trampoline[1] = 0x25;
541+
memset(trampoline + 2, 0, 4);
542+
memcpy(trampoline + 6, &value, 8);
543+
544+
// Patch the call site to call the trampoline instead
545+
patch_32r(location, (uintptr_t)trampoline - 4);
546+
}
547+
504548
static void
505549
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
506550
{

Tools/jit/README.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT.
99

1010
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
1111

12-
LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
12+
LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
1313

1414
It's easy to install all of the required tools:
1515

1616
### Linux
1717

18-
Install LLVM 19 on Ubuntu/Debian:
18+
Install LLVM 20 on Ubuntu/Debian:
1919

2020
```sh
2121
wget https://apt.llvm.org/llvm.sh
2222
chmod +x llvm.sh
23-
sudo ./llvm.sh 19
23+
sudo ./llvm.sh 20
2424
```
2525

26-
Install LLVM 19 on Fedora Linux 40 or newer:
26+
Install LLVM 20 on Fedora Linux 40 or newer:
2727

2828
```sh
29-
sudo dnf install 'clang(major) = 19' 'llvm(major) = 19'
29+
sudo dnf install 'clang(major) = 20' 'llvm(major) = 20'
3030
```
3131

3232
### macOS
3333

34-
Install LLVM 19 with [Homebrew](https://brew.sh):
34+
Install LLVM 20 with [Homebrew](https://brew.sh):
3535

3636
```sh
37-
brew install llvm@19
37+
brew install llvm@20
3838
```
3939

4040
Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
@@ -43,18 +43,18 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri
4343

4444
LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.
4545

46-
Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
46+
Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
4747

4848
Alternatively, you can use [chocolatey](https://chocolatey.org):
4949

5050
```sh
51-
choco install llvm --version=19.1.0
51+
choco install llvm --version=20.1.8
5252
```
5353

5454
### Dev Containers
5555

5656
If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no
57-
need to install LLVM as the Fedora 41 base image includes LLVM 19 out of the box.
57+
need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box.
5858

5959
## Building
6060

Tools/jit/_llvm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
import _targets
1212

1313

14-
_LLVM_VERSION = "19"
15-
_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
14+
_LLVM_VERSION = "20"
15+
_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0"
1616

1717
_P = typing.ParamSpec("_P")
1818
_R = typing.TypeVar("_R")

Tools/jit/_stencils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,23 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None:
253253
self._trampolines.add(ordinal)
254254
hole.addend = ordinal
255255
hole.symbol = None
256+
# x86_64 Darwin trampolines for external symbols
257+
elif (
258+
hole.kind == "X86_64_RELOC_BRANCH"
259+
and hole.value is HoleValue.ZERO
260+
and hole.symbol not in self.symbols
261+
):
262+
hole.func = "patch_x86_64_trampoline"
263+
hole.need_state = True
264+
assert hole.symbol is not None
265+
if hole.symbol in known_symbols:
266+
ordinal = known_symbols[hole.symbol]
267+
else:
268+
ordinal = len(known_symbols)
269+
known_symbols[hole.symbol] = ordinal
270+
self._trampolines.add(ordinal)
271+
hole.addend = ordinal
272+
hole.symbol = None
256273
self.data.pad(8)
257274
for stencil in [self.code, self.data]:
258275
for hole in stencil.holes:

Tools/jit/_targets.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,6 @@ async def _compile(
166166
"-fno-asynchronous-unwind-tables",
167167
# Don't call built-in functions that we can't find or patch:
168168
"-fno-builtin",
169-
# Emit relaxable 64-bit calls/jumps, so we don't have to worry about
170-
# about emitting in-range trampolines for out-of-range targets.
171-
# We can probably remove this and emit trampolines in the future:
172-
"-fno-plt",
173169
# Don't call stack-smashing canaries that we can't find or patch:
174170
"-fno-stack-protector",
175171
"-std=c11",
@@ -571,14 +567,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
571567
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
572568
host = "aarch64-pc-windows-msvc"
573569
condition = "defined(_M_ARM64)"
574-
args = ["-fms-runtime-lib=dll", "-fplt"]
570+
args = ["-fms-runtime-lib=dll"]
575571
optimizer = _optimizers.OptimizerAArch64
576572
target = _COFF64(host, condition, args=args, optimizer=optimizer)
577573
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
578574
host = "aarch64-unknown-linux-gnu"
579575
condition = "defined(__aarch64__) && defined(__linux__)"
580576
# -mno-outline-atomics: Keep intrinsics from being emitted.
581-
args = ["-fpic", "-mno-outline-atomics"]
577+
args = ["-fpic", "-mno-outline-atomics", "-fno-plt"]
582578
optimizer = _optimizers.OptimizerAArch64
583579
target = _ELF(host, condition, args=args, optimizer=optimizer)
584580
elif re.fullmatch(r"i686-pc-windows-msvc", host):
@@ -602,7 +598,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
602598
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
603599
host = "x86_64-unknown-linux-gnu"
604600
condition = "defined(__x86_64__) && defined(__linux__)"
605-
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
601+
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"]
606602
optimizer = _optimizers.OptimizerX86
607603
target = _ELF(host, condition, args=args, optimizer=optimizer)
608604
else:

0 commit comments

Comments
 (0)