Skip to content

Commit 47da6da

Browse files
authored
cibuildwheel-related work (#369)
1 parent 9701b0e commit 47da6da

File tree

17 files changed

+537
-84
lines changed

17 files changed

+537
-84
lines changed

.github/workflows/cibuildwheel.yaml

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# SPDX-FileCopyrightText: 2025 geisserml <[email protected]>
2+
# SPDX-FileCopyrightText: 2025 wojiushixiaobai <[email protected]>
3+
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4+
5+
# NOTE: This workflow is currently written with a dynamic matrix.
6+
# Another option would be to extract a reusable "build one" workflow and declare an individual job for each target here.
7+
8+
name: Build with cibuildwheel
9+
on:
10+
workflow_dispatch:
11+
inputs:
12+
cibw_py_ver:
13+
default: 'cp38'
14+
type: string
15+
linux_main:
16+
default: true
17+
type: boolean
18+
linux_ibm:
19+
default: true
20+
type: boolean
21+
linux_emulated:
22+
default: false
23+
type: boolean
24+
linux_musl:
25+
default: true
26+
type: boolean
27+
28+
permissions: {}
29+
30+
jobs:
31+
32+
prepare_matrix:
33+
name: Determine build matrix
34+
runs-on: ubuntu-latest
35+
outputs:
36+
matrix: ${{ steps.set-matrix.outputs.matrix }}
37+
38+
steps:
39+
- name: Run python script that outputs the build matrix
40+
id: set-matrix
41+
shell: python
42+
env:
43+
LINUX_MAIN: ${{ inputs.linux_main && 1 || 0 }}
44+
LINUX_IBM: ${{ inputs.linux_ibm && 1 || 0 }}
45+
LINUX_EMULATED: ${{ inputs.linux_emulated && 1 || 0 }}
46+
LINUX_MUSL: ${{ inputs.linux_musl && 1 || 0 }}
47+
run: |
48+
import os, sys, json
49+
50+
LINUX_MAIN = bool(int( os.environ["LINUX_MAIN"] ))
51+
LINUX_IBM = bool(int( os.environ["LINUX_IBM"] ))
52+
LINUX_EMULATED = bool(int( os.environ["LINUX_EMULATED"] ))
53+
LINUX_MUSL = bool(int( os.environ["LINUX_MUSL"] ))
54+
55+
matrix = []
56+
images = ["manylinux"]
57+
if LINUX_MUSL:
58+
images.append("musllinux")
59+
60+
def job(image, os, arch, emulated=False):
61+
matrix.append(dict(
62+
image=image, os=os, arch=arch, emulated=emulated
63+
))
64+
65+
def linux_job(os, arch, emulated=False, images=images):
66+
for image in images:
67+
job(os, arch, image, emulated)
68+
69+
if LINUX_MAIN:
70+
linux_job("ubuntu-24.04", "x86_64")
71+
linux_job("ubuntu-24.04-arm", "aarch64")
72+
if LINUX_IBM:
73+
# XXX will become native as soon as we get access to IBM's self-hosted runners
74+
linux_job("ubuntu-24.04", "ppc64le", True) # False
75+
linux_job("ubuntu-24.04", "s390x", True) # False
76+
if LINUX_EMULATED:
77+
linux_job("ubuntu-24.04", "loongarch64", True)
78+
linux_job("ubuntu-24.04", "riscv64", True)
79+
if LINUX_MUSL:
80+
# pdfium-binaries don't currently build armv7l for musl (but they do for glibc)
81+
linux_job("ubuntu-24.04", "armv7l", True, images=("musllinux", ))
82+
83+
matrix_json = json.dumps(matrix)
84+
print(matrix_json, file=sys.stderr)
85+
with open(os.environ["GITHUB_OUTPUT"], 'a') as output_fh:
86+
print(f"matrix={matrix_json}", file=output_fh)
87+
88+
build_wheels:
89+
name: Build ${{ matrix.arch }} ${{ matrix.image }} on ${{ matrix.os }}
90+
runs-on: ${{ matrix.os }}
91+
needs: prepare_matrix
92+
93+
strategy:
94+
fail-fast: false
95+
matrix:
96+
include: ${{ fromJSON(needs.prepare_matrix.outputs.matrix) }}
97+
98+
steps:
99+
100+
- name: Check out the repo
101+
uses: actions/checkout@v4
102+
with:
103+
fetch-depth: 0
104+
105+
- name: Set up QEMU
106+
if: ${{ matrix.emulated }}
107+
uses: docker/setup-qemu-action@v3
108+
109+
# Reminder: most configuration is in pyproject.toml so we can use TOML overrides
110+
- name: Build wheels
111+
uses: pypdfium2-team/[email protected]
112+
env:
113+
# Will be tagged as not python specific by our setup.py. inputs.cibw_py_ver only controls the version used at build time. Could also use `*`, then cibuildwheel would build with the oldest supported version, and walk through the others but skip because a compatible wheel is around already.
114+
CIBW_BUILD: "${{ inputs.cibw_py_ver }}-${{ matrix.image }}_${{ matrix.arch }}"
115+
CIBW_ARCHS: ${{ matrix.arch }}
116+
with:
117+
output-dir: wheelhouse
118+
119+
- name: Upload artifact
120+
uses: actions/upload-artifact@v4
121+
with:
122+
path: ./wheelhouse/*.whl
123+
name: cibw-${{ matrix.image }}-${{ matrix.arch }}

.github/workflows/conda.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515
default: 'latest'
1616
type: string
1717
new_only:
18-
# only with package == "raw", ignored otherwise (actually the default should be false in that case, but I don't know if GH supports dynamic defaults depending on other inputs)
18+
# only with package == "raw", ignored otherwise (actually the default should be false in that case, but don't know if GH supports dynamic defaults depending on other inputs)
1919
default: true
2020
type: boolean
2121
test:
@@ -95,6 +95,7 @@ jobs:
9595
fail-fast: false
9696
matrix:
9797
# NOTE On GH actions, macOS <=13 is Intel, whereas macOS >=14 will be ARM64
98+
# Can't test 'windows-11-arm' because setup-miniconda doesn't support it AOTW
9899
os: ['ubuntu-latest', 'ubuntu-24.04-arm', 'macos-13', 'macos-latest', 'windows-latest']
99100
py: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
100101

.github/workflows/main.yaml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,16 @@ jobs:
118118
fail-fast: false
119119
matrix:
120120
# NOTE On GH actions, macOS <=13 is Intel, whereas macOS >=14 will be ARM64
121-
os: ['ubuntu-latest', 'ubuntu-24.04-arm', 'macos-13', 'macos-latest', 'windows-latest']
121+
os: ['ubuntu-latest', 'ubuntu-24.04-arm', 'macos-13', 'macos-latest', 'windows-latest', 'windows-11-arm']
122122
py: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
123+
exclude:
124+
# not supported by setup-python action
125+
- os: windows-11-arm
126+
py: '3.8'
127+
- os: windows-11-arm
128+
py: '3.9'
129+
- os: windows-11-arm
130+
py: '3.10'
123131
include:
124132
- os: ubuntu-latest
125133
wheel: dist/*manylinux_*_x86_64*.whl
@@ -131,13 +139,13 @@ jobs:
131139
wheel: dist/*macosx_*_arm64*.whl
132140
- os: windows-latest
133141
wheel: dist/*win_amd64.whl
142+
- os: windows-11-arm
143+
wheel: dist/*win_arm64.whl
134144

135145
runs-on: ${{ matrix.os }}
136146

137147
steps:
138148

139-
- uses: extractions/setup-just@v3
140-
141149
- name: Set up Python
142150
uses: actions/setup-python@v5
143151
with:
@@ -172,7 +180,7 @@ jobs:
172180
WHEEL: ${{ matrix.wheel }}
173181

174182
- name: Run Test Suite
175-
run: just test
183+
run: python3 -m pytest tests/
176184

177185

178186
publish:

.github/workflows/test_release.yaml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,21 @@ jobs:
2121
fail-fast: false
2222
matrix:
2323
# NOTE On GH actions, macOS <=13 is Intel, whereas macOS >=14 will be ARM64
24-
os: ['ubuntu-latest', 'ubuntu-24.04-arm', 'macos-13', 'macos-latest', 'windows-latest']
24+
os: ['ubuntu-latest', 'ubuntu-24.04-arm', 'macos-13', 'macos-latest', 'windows-latest', 'windows-11-arm']
2525
py: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
26+
exclude:
27+
# not supported by setup-python action
28+
- os: windows-11-arm
29+
py: '3.8'
30+
- os: windows-11-arm
31+
py: '3.9'
32+
- os: windows-11-arm
33+
py: '3.10'
2634

2735
runs-on: ${{ matrix.os }}
2836

2937
steps:
3038

31-
- uses: extractions/setup-just@v3
32-
3339
- name: Set up Python
3440
uses: actions/setup-python@v5
3541
with:
@@ -54,4 +60,4 @@ jobs:
5460
run: python3 -m pip install -U -r req/converters.txt -r req/test.txt
5561

5662
- name: Run tests
57-
run: just test
63+
run: python3 -m pytest tests/

.github/workflows/test_setup.yaml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,19 @@ jobs:
1818
fail-fast: false
1919
matrix:
2020
# NOTE On GH actions, macOS <=13 is Intel, whereas macOS >=14 will be ARM64
21-
os: ['ubuntu-latest', 'ubuntu-24.04-arm', 'macos-13', 'macos-latest', 'windows-latest']
21+
os: ['ubuntu-latest', 'ubuntu-24.04-arm', 'macos-13', 'macos-latest', 'windows-latest', 'windows-11-arm']
2222
py: ['3.9', '3.10', '3.11', '3.12', '3.13']
23+
exclude:
24+
# not supported by setup-python action
25+
- os: windows-11-arm
26+
py: '3.9'
27+
- os: windows-11-arm
28+
py: '3.10'
2329

2430
runs-on: ${{ matrix.os }}
2531

2632
steps:
2733

28-
- uses: extractions/setup-just@v3
29-
3034
# AOTW, the slsa-verifier GH action does not support anything but Ubuntu x86_64.
3135
- name: slsa-verifier
3236
if: ${{ startsWith(matrix.os, 'ubuntu') && !endsWith(matrix.os, '-arm') }}
@@ -61,7 +65,7 @@ jobs:
6165
run: python3 -m pip install -v --no-build-isolation -e .
6266

6367
- name: Build docs
64-
run: just docs-build
68+
run: python3 -m sphinx -b html docs/source docs/build/html
6569

6670
- name: Run test suite
67-
run: just test
71+
run: python3 -m pytest tests/

.github/workflows/test_sourcebuild.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ jobs:
1919
fail-fast: false
2020
matrix:
2121
# On GH actions, macOS <=13 is Intel, whereas macOS >=14 will be ARM64
22-
# Google's toolchain doesn't seem to run on Linux arm64 natively. The toolchain-free build (or cross-compilation from x86_64) should work, though.
23-
os: ['ubuntu-latest', 'macos-13', 'macos-latest', 'windows-latest'] # 'ubuntu-24.04-arm'
22+
# Google's toolchain doesn't seem to run on Linux/Windows arm64 natively. The toolchain-free build (or cross-compilation from x86_64) should work, though.
23+
# 'ubuntu-24.04-arm', 'windows-11-arm'
24+
os: ['ubuntu-latest', 'macos-13', 'macos-latest', 'windows-latest']
2425
build_mode: ['toolchained']
2526
include:
2627
- os: 'ubuntu-latest'

README.md

Lines changed: 66 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ This project comes with two scripts to automate the build process: `build_toolch
142142
- `build_toolchained` is based on the build instructions in pdfium's Readme, and uses Google's toolchain (this means foreign binaries and sysroots). This results in a heavy checkout process that may take a lot of time and space. By default, this script will use vendored libraries, but you can also pass `--use-syslibs` to try to use system libraries. An advantage of the toolchain is its powerful cross-compilation support (including symbol reversioning).
143143
- `build_native` is an attempt to address some shortcomings of the toolchained build (mainly a bloated checkout process, and lack of portability). It is tailored towards native compilation, and uses system tools and libraries (including the system's GCC compiler), which must be installed by the caller beforehand. This script should theoretically work on arbitrary Linux architectures. As a drawback, this process is not supported or even documented upstream, so it might be hard to maintain.
144144

145-
You can also set `PDFIUM_PLATFORM` to `sourcebuild-native` or `sourcebuild-toolchained` to trigger either build script through setup.
145+
You can also set `PDFIUM_PLATFORM` to `sourcebuild-native` or `sourcebuild-toolchained` to trigger either build script through setup, and pass command-line flags with `$BUILD_PARAMS`.
146146
However, for simplicity, both scripts/subtargets share just `sourcebuild` as staging directory.
147147

148148
Dependencies:
@@ -160,7 +160,7 @@ PDFIUM_PLATFORM="sourcebuild" python -m pip install -v .
160160
Or for the native build, on Ubuntu 24.04, you could do e.g.:
161161
```bash
162162
# Install dependencies
163-
sudo apt-get install generate-ninja ninja-build libfreetype-dev liblcms2-dev libjpeg-dev libopenjp2-7-dev libpng-dev zlib1g-dev libicu-dev libtiff-dev libglib2.0-dev
163+
sudo apt-get install generate-ninja ninja-build libfreetype-dev liblcms2-dev libjpeg-dev libopenjp2-7-dev libpng-dev libtiff-dev zlib1g-dev libicu-dev libglib2.0-dev
164164
```
165165
```bash
166166
# Build with GCC
@@ -180,9 +180,50 @@ python ./setupsrc/pypdfium2_setup/build_native.py --compiler clang
180180
PDFIUM_PLATFORM="sourcebuild" python -m pip install -v .
181181
```
182182

183+
Note, on *some* platforms, you might also need symlinks for GCC, e.g.:
184+
```bash
185+
PREFIX=$(python ./utils/get_gcc_prefix.py) # in pypdfium2 dir
186+
GCC_DIR="/usr" # or e.g. /opt/rh/gcc-toolset-14/root
187+
sudo ln -s $GCC_DIR/bin/gcc $GCC_DIR/bin/$PREFIX-gcc
188+
sudo ln -s $GCC_DIR/bin/g++ $GCC_DIR/bin/$PREFIX-g++
189+
sudo ln -s $GCC_DIR/bin/nm $GCC_DIR/bin/$PREFIX-nm
190+
sudo ln -s $GCC_DIR/bin/readelf $GCC_DIR/bin/$PREFIX-readelf
191+
```
192+
183193
> [!TIP]
184194
> By default, the build scripts will create separate DLLs for vendored dependency libraries (e.g. `abseil`). However, if you want to bundle everything into a single DLL, pass `--single-lib`.
185195
196+
> [!NOTE]
197+
> The native sourcebuild currently supports Linux (or similar).
198+
> macOS and Windows are not handled, as we do not have access to these systems, and working over CI did not turn out feasible – use the toolchain-based build for now.
199+
> Community help / pull requests to extend platform support would be welcome.
200+
201+
##### cibuildwheel
202+
203+
The native sourcebuild can be run through cibuildwheel. For targets configured in our [`pyproject.toml`](./pyproject.toml), the basic invocation is as simple as p.ex.
204+
```bash
205+
CIBW_BUILD="cp311-manylinux_x86_64" cibuildwheel
206+
```
207+
208+
See also our [cibuildwheel workflow](.github/workflows/cibuildwheel.yaml).
209+
For more options, see the [upstream documentation](https://cibuildwheel.pypa.io/en/stable/options).
210+
211+
Note that, for Linux, cibuildwheel requires Docker. On the author's version of Fedora, it can be installed as follows:
212+
```bash
213+
sudo dnf in moby-engine # this provides the docker command
214+
sudo systemctl start docker
215+
sudo systemctl enable docker
216+
sudo usermod -aG docker $USER
217+
# then reboot (re-login might also suffice)
218+
```
219+
For other ways of installing Docker, refer to the cibuildwheel docs ([Setup](https://cibuildwheel.pypa.io/en/stable/setup/), [Platforms](https://cibuildwheel.pypa.io/en/stable/platforms/)) and the links therein.
220+
221+
> [!WARNING]
222+
> cibuildwheel copies the project directory into a container, not taking `.gitignore` rules into account.
223+
> Thus, it is advisable to make a fresh checkout of pypdfium2 before running cibuildwheel.
224+
> In particular, a toolchained checkout of pdfium within pypdfium2 is problematic, and will cause a halt on the `Copying project into container...` step.
225+
> For development, make sure the fresh checkout is in sync with the working copy.
226+
186227
##### Android (Termux)
187228

188229
The native build may also work on Android with Termux in principle.
@@ -310,7 +351,7 @@ Disclaimer: As it is hard to keep up with constantly evolving setup code, it is
310351
+ If unset or `auto`, the host platform is detected and a corresponding binary will be selected.
311352
+ If an explicit platform identifier (e.g. `linux_x64`, `darwin_arm64`, ...), binaries for the requested platform will be used.[^platform_ids]
312353
+ If `system-search`, look for and bind against system-provided pdfium instead of embedding a binary. If just `system`, consume existing bindings from `data/system/`.
313-
+ If `sourcebuild`, binary and bindings will be taken from `data/sourcebuild/`, assuming a prior run of the native or toolchained build scripts. `sourcebuild-native` or `sourcebuild-toolchained` can also be used to trigger either build through setup. However, triggering on the caller side is preferred as this allows to pass custom options.
354+
+ If `sourcebuild`, binary and bindings will be taken from `data/sourcebuild/`, assuming a prior run of the native or toolchained build scripts. `sourcebuild-native` or `sourcebuild-toolchained` can also be used to trigger either build through setup (use `$BUILD_PARAMS` to pass custom options).
314355
+ If `sdist`, no platform-specific files will be included, so as to create a source distribution.
315356

316357
* `$PYPDFIUM_MODULES=[raw,helpers]` defines the modules to include. Metadata adapts dynamically.
@@ -954,7 +995,6 @@ Additionally, one doc build can also be hosted on [GitHub Pages](https://pypdfiu
954995
It is implemented with a CI workflow, which is supposed to be triggered automatically on release.
955996
This provides us with full control over build env and used commands, whereas RTD may be less liberal in this regard.
956997

957-
958998
### Testing
959999

9601000
pypdfium2 contains a small test suite to verify the library's functionality. It is written with [pytest](https://github.com/pytest-dev/pytest/):
@@ -984,10 +1024,28 @@ find . -name '*.pdf' -exec bash -c "echo \"{}\" && pypdfium2 toc \"{}\"" \;
9841024

9851025
[^testing_corpora]: For instance, one could use the testing corpora of open-source PDF libraries (pdfium, pikepdf/ocrmypdf, mupdf/ghostscript, tika/pdfbox, pdfjs, ...)
9861026

1027+
### Adding a new workflow
1028+
1029+
When writing a new workflow, it is usually desirable to test in a branch first before merging into main.
1030+
However, new workflows from branches cannot be dispatched from the GitHub Actions panel yet. That's why you'll want to use the [`gh`](https://cli.github.com/) command-line tool, as follows:
1031+
```bash
1032+
gh workflow run $WORKFLOW_NAME.yaml --ref $MY_BRANCH
1033+
```
1034+
If inputs are needed, JSON can be used
1035+
```bash
1036+
echo '{"my_json_info":1, "my_var":"hello"}' | gh workflow run $WORKFLOW_NAME.yaml --ref $MY_BRANCH --json
1037+
# real-world example
1038+
echo '{"cibw_py_ver":"cp38", "linux_main":"true", "linux_ibm":"false", "linux_emulated":"false", "linux_musl":"true"}' | gh workflow run cibuildwheel.yaml --ref cibuildwheel --json
1039+
```
1040+
You should pass the complete set of fields here, defaults might not be recognized with this form of dispatch.
1041+
1042+
> [!IMPORTANT]
1043+
> You need to be in the pypdfium2 directory for this to work. Otherwise, the request will be silently ignored.
1044+
9871045
### Release workflow
9881046

9891047
The release process is fully automated using Python scripts and scheduled release workflows.
990-
You may also trigger the workflow manually using the GitHub Actions panel or the [`gh`](https://cli.github.com/) command-line tool.
1048+
You may also trigger the workflow manually from the GitHub Actions panel or similar.
9911049

9921050
Python release scripts are located in the folder `setupsrc/pypdfium2_setup`, along with custom setup code:
9931051
* `update.py` downloads binaries.
@@ -1038,10 +1096,10 @@ If something went wrong with commit or tag, you can still revert the changes:
10381096
# perform an interactive rebase to change history (substitute $N_COMMITS with the number of commits to drop or modify)
10391097
git rebase -i HEAD~$N_COMMITS
10401098
git push --force
1041-
# delete local tag (substitute $TAGNAME accordingly)
1042-
git tag -d $TAGNAME
1043-
# delete remote tag
1099+
# delete remote tag (substitute $TAGNAME accordingly)
10441100
git push --delete origin $TAGNAME
1101+
# delete local tag
1102+
git tag -d $TAGNAME
10451103
```
10461104
Faulty PyPI releases may be yanked using the web interface.
10471105

0 commit comments

Comments
 (0)