Skip to content

Commit 68230fe

Browse files
rmdg88dolfim-ibm
andauthored
ci: split workflow to speedup CI runtime (#2313)
* split workflow Signed-off-by: rmdg88 <[email protected]> * split workflow Signed-off-by: rmdg88 <[email protected]> * enable test_e2e_pdfs_conversions Co-authored-by: Michele Dolfi <[email protected]> Signed-off-by: Rui Dias Gomes <[email protected]> * split workflow Signed-off-by: rmdg88 <[email protected]> * split workflow Signed-off-by: rmdg88 <[email protected]> * split workflow Signed-off-by: rmdg88 <[email protected]> * split workflow Signed-off-by: rmdg88 <[email protected]> * split workflow Signed-off-by: rmdg88 <[email protected]> * fix conflict files Signed-off-by: rmdg88 <[email protected]> --------- Signed-off-by: rmdg88 <[email protected]> Signed-off-by: Rui Dias Gomes <[email protected]> Signed-off-by: Michele Dolfi <[email protected]> Co-authored-by: Michele Dolfi <[email protected]> Co-authored-by: Michele Dolfi <[email protected]>
1 parent ee73ffa commit 68230fe

File tree

4 files changed

+293
-50
lines changed

4 files changed

+293
-50
lines changed

.github/workflows/checks.yml

Lines changed: 277 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,91 +2,314 @@ on:
22
workflow_call:
33
inputs:
44
push_coverage:
5-
type: boolean
6-
description: "If true, the coverage results are pushed to codecov.io."
7-
default: true
5+
type: boolean
6+
description: "If true, the coverage results are pushed to codecov.io."
7+
default: true
88
secrets:
99
CODECOV_TOKEN:
10-
required: false
10+
required: false
1111

1212
env:
13-
HF_HUB_DOWNLOAD_TIMEOUT: "60"
14-
HF_HUB_ETAG_TIMEOUT: "60"
13+
HF_HUB_DOWNLOAD_TIMEOUT: "90"
14+
HF_HUB_ETAG_TIMEOUT: "90"
1515
UV_FROZEN: "1"
16+
PYTEST_ML: |-
17+
tests/test_e2e_conversion.py
18+
tests/test_e2e_ocr_conversion.py
19+
tests/test_backend_webp.py
20+
tests/test_asr_pipeline.py
21+
tests/test_threaded_pipeline.py
22+
PYTEST_TO_SKIP: |-
23+
EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping)\.py$'
1624

1725
jobs:
18-
run-checks:
26+
lint:
1927
runs-on: ubuntu-latest
2028
strategy:
29+
fail-fast: false
2130
matrix:
22-
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
31+
python-version: ['3.12']
2332
steps:
24-
- uses: actions/checkout@v4
25-
- name: Install tesseract and ffmpeg
26-
run: sudo apt-get update && sudo apt-get install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
27-
- name: Set TESSDATA_PREFIX
28-
run: |
29-
echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
30-
- name: Cache Hugging Face models
31-
uses: actions/cache@v4
32-
with:
33-
path: ~/.cache/huggingface
34-
key: huggingface-cache-py${{ matrix.python-version }}
33+
- uses: actions/checkout@v5
34+
3535
- name: Install uv and set the python version
36-
uses: astral-sh/setup-uv@v5
36+
uses: astral-sh/setup-uv@v6
3737
with:
3838
python-version: ${{ matrix.python-version }}
3939
enable-cache: true
40-
- name: pre-commit cache key
40+
41+
- name: Set pre-commit cache key
4142
run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV"
42-
- uses: actions/cache@v4
43+
44+
- name: Cache pre-commit environments
45+
uses: actions/cache@v4
4346
with:
4447
path: ~/.cache/pre-commit
4548
key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
46-
- name: Install dependencies
49+
restore-keys: |
50+
pre-commit|${{ env.PY }}|
51+
52+
- name: Install Python Dependencies
4753
run: uv sync --frozen --all-extras
48-
- name: Check style and run tests
49-
run: pre-commit run --all-files
50-
- name: Testing
51-
run: |
52-
uv run --no-sync pytest -v --cov=docling --cov-report=xml tests
53-
- name: Upload coverage to Codecov
54-
if: inputs.push_coverage
55-
uses: codecov/codecov-action@v5
56-
with:
57-
token: ${{ secrets.CODECOV_TOKEN }}
58-
files: ./coverage.xml
59-
- name: Run examples
54+
55+
- name: Check style
6056
run: |
61-
for file in docs/examples/*.py; do
62-
# Skip batch_convert.py
63-
if [[ "$(basename "$file")" =~ ^(batch_convert|granitedocling_repetition_stopping|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model).py ]]; then
64-
echo "Skipping $file"
65-
continue
57+
echo "--- Running pre-commit style checks ---"
58+
uv run pre-commit run --all-files
59+
60+
run-tests-1:
61+
runs-on: ubuntu-latest
62+
strategy:
63+
fail-fast: false
64+
matrix:
65+
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
66+
steps:
67+
- uses: actions/checkout@v5
68+
69+
- name: Grant permissions to APT cache directory # allows restore
70+
run: sudo chown -R $USER:$USER /var/cache/apt/archives
71+
72+
- name: Cache APT packages
73+
id: apt-cache
74+
uses: actions/cache@v4
75+
with:
76+
path: /var/cache/apt/archives
77+
key: apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
78+
restore-keys: |
79+
apt-packages-${{ runner.os }}-
80+
81+
- name: Install System Dependencies
82+
run: |
83+
if [[ "${{ steps.apt-cache.outputs.cache-hit }}" != "true" ]]; then
84+
sudo apt-get -qq update
6685
fi
86+
sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
87+
88+
- name: Set TESSDATA_PREFIX
89+
run: echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
90+
91+
- name: Install uv and set the python version
92+
uses: astral-sh/setup-uv@v6
93+
with:
94+
python-version: ${{ matrix.python-version }}
95+
96+
- name: Install Python Dependencies
97+
run: uv sync --frozen --all-extras
98+
99+
- name: Cache Models
100+
uses: actions/cache@v4
101+
with:
102+
path: |
103+
~/.cache/huggingface
104+
~/.cache/modelscope
105+
~/.EasyOCR/
106+
key: models-cache
107+
108+
- name: Pre-download Models
109+
run: uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
110+
111+
- name: Run tests for GROUP1
112+
run: |
113+
echo "--- Running tests ---"
114+
GROUP1=$(echo "$PYTEST_ML" | sed -e 's/^/--ignore=/' | tr '\n' ' ')
115+
echo "Running tests for GROUP1"
116+
uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $GROUP1
117+
118+
- name: Upload coverage to Codecov
119+
if: inputs.push_coverage
120+
uses: codecov/codecov-action@v5
121+
with:
122+
token: ${{ secrets.CODECOV_TOKEN }}
123+
files: ./coverage.xml
124+
flags: run-tests-1
125+
126+
- name: Grant permissions to APT cache directory # allows backup
127+
run: sudo chown -R $USER:$USER /var/cache/apt/archives
128+
129+
run-tests-2:
130+
runs-on: ubuntu-latest
131+
strategy:
132+
fail-fast: false
133+
matrix:
134+
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
135+
steps:
136+
- uses: actions/checkout@v5
137+
138+
- name: Grant permissions to APT cache directory # allows restore
139+
run: sudo chown -R $USER:$USER /var/cache/apt/archives
140+
141+
- name: Cache APT packages
142+
id: apt-cache
143+
uses: actions/cache@v4
144+
with:
145+
path: /var/cache/apt/archives
146+
key: apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
147+
restore-keys: |
148+
apt-packages-${{ runner.os }}-
149+
150+
- name: Install System Dependencies
151+
run: |
152+
if [[ "${{ steps.apt-cache.outputs.cache-hit }}" != "true" ]]; then
153+
sudo apt-get -qq update
154+
fi
155+
sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
156+
157+
- name: Set TESSDATA_PREFIX
158+
run: echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
67159

68-
echo "Running example $file"
69-
uv run --no-sync python "$file" || exit 1
70-
done
160+
- name: Install uv and set the python version
161+
uses: astral-sh/setup-uv@v6
162+
with:
163+
python-version: ${{ matrix.python-version }}
164+
165+
- name: Install Python Dependencies
166+
run: uv sync --frozen --all-extras
167+
168+
- name: Cache Models
169+
uses: actions/cache@v4
170+
with:
171+
path: |
172+
~/.cache/huggingface
173+
~/.cache/modelscope
174+
~/.EasyOCR/
175+
key: models-cache
176+
177+
- name: Pre-download Models
178+
run: uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
179+
180+
- name: Run tests for GROUP2
181+
run: |
182+
echo "--- Running tests ---"
183+
GROUP2=$(echo "$PYTEST_ML" | tr '\n' ' ')
184+
echo "Running tests for GROUP2"
185+
DESELECT_OPT=""
186+
if [ -n "$PYTEST_TO_SKIP" ]; then
187+
DESELECT_OPT="--deselect $PYTEST_TO_SKIP"
188+
fi
189+
echo "Running tests for GROUP2"
190+
uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $GROUP2 $DESELECT_OPT
191+
192+
- name: Upload coverage to Codecov
193+
if: inputs.push_coverage
194+
uses: codecov/codecov-action@v5
195+
with:
196+
token: ${{ secrets.CODECOV_TOKEN }}
197+
files: ./coverage.xml
198+
flags: run-tests-2
199+
200+
- name: Grant permissions to APT cache directory # allows backup
201+
run: sudo chown -R $USER:$USER /var/cache/apt/archives
202+
203+
run-examples:
204+
runs-on: ubuntu-latest
205+
strategy:
206+
fail-fast: false
207+
matrix:
208+
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
209+
steps:
210+
- uses: actions/checkout@v5
211+
212+
- name: Grant permissions to APT cache directory # allows restore
213+
run: sudo chown -R $USER:$USER /var/cache/apt/archives
214+
215+
- name: Cache APT packages
216+
id: apt-cache
217+
uses: actions/cache@v4
218+
with:
219+
path: /var/cache/apt/archives
220+
key: apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
221+
restore-keys: |
222+
apt-packages-${{ runner.os }}-
223+
224+
- name: Install System Dependencies
225+
run: |
226+
if [[ "${{ steps.apt-cache.outputs.cache-hit }}" != "true" ]]; then
227+
sudo apt-get -qq update
228+
fi
229+
sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
230+
231+
- name: Set TESSDATA_PREFIX
232+
run: echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
233+
234+
- name: Install uv and set the python version
235+
uses: astral-sh/setup-uv@v6
236+
with:
237+
python-version: ${{ matrix.python-version }}
238+
239+
- name: Install Python Dependencies
240+
run: uv sync --frozen --all-extras
241+
242+
- name: Cache Models
243+
uses: actions/cache@v4
244+
with:
245+
path: |
246+
~/.cache/huggingface
247+
~/.cache/modelscope
248+
~/.EasyOCR/
249+
key: models-cache
250+
251+
- name: Pre-download Models
252+
run: uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
253+
254+
- name: Run examples
255+
run: |
256+
echo "--- Creating output directory ---"
257+
mkdir -p scratch
258+
259+
echo "--- Running examples ---"
260+
261+
summary_file="runtime_summary.log"
262+
echo "--- Example Runtimes ---" > "$summary_file"
263+
264+
for file in docs/examples/*.py; do
265+
if [[ "$(basename "$file")" =~ ${EXAMPLES_TO_SKIP} ]]; then
266+
echo "Skipping example: $(basename "$file")"
267+
else
268+
echo "--- Running example $(basename "$file") ---"
269+
270+
start_time=$SECONDS
271+
272+
uv run --no-sync python "$file" || exit 1
273+
duration=$((SECONDS - start_time))
274+
echo "Finished in ${duration}s."
275+
276+
echo "$(basename "$file"): ${duration}s" >> "$summary_file"
277+
fi
278+
done
279+
280+
echo
281+
echo "==================================="
282+
echo " Final Runtime Summary "
283+
echo "==================================="
284+
cat "$summary_file"
285+
echo "==================================="
286+
287+
- name: Grant permissions to APT cache directory # allows backup
288+
run: sudo chown -R $USER:$USER /var/cache/apt/archives
71289

72290
build-package:
73291
runs-on: ubuntu-latest
74292
strategy:
75293
matrix:
76294
python-version: ['3.12']
77295
steps:
78-
- uses: actions/checkout@v4
296+
- uses: actions/checkout@v5
297+
79298
- name: Install uv and set the python version
80-
uses: astral-sh/setup-uv@v5
299+
uses: astral-sh/setup-uv@v6
81300
with:
82301
python-version: ${{ matrix.python-version }}
83302
enable-cache: true
303+
84304
- name: Install dependencies
85305
run: uv sync --all-extras
306+
86307
- name: Build package
87308
run: uv build
309+
88310
- name: Check content of wheel
89311
run: unzip -l dist/*.whl
312+
90313
- name: Store the distribution packages
91314
uses: actions/upload-artifact@v4
92315
with:
@@ -106,12 +329,17 @@ jobs:
106329
with:
107330
name: python-package-distributions
108331
path: dist/
332+
109333
- name: Install uv and set the python version
110-
uses: astral-sh/setup-uv@v5
334+
uses: astral-sh/setup-uv@v6
111335
with:
112336
python-version: ${{ matrix.python-version }}
113-
enable-cache: true
337+
activate-environment: true
338+
enable-cache: false
339+
114340
- name: Install package
115-
run: uv pip install dist/*.whl
341+
run: |
342+
uv pip install dist/*.whl
343+
116344
- name: Run docling
117-
run: docling --help
345+
run: uv run docling --help

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,6 @@ repos:
2222
language: system
2323
files: '\.py$'
2424
- repo: https://github.com/astral-sh/uv-pre-commit
25-
rev: 0.7.8
25+
rev: 0.8.3
2626
hooks:
2727
- id: uv-lock

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ dev = [
123123
"pytest~=8.3",
124124
"pytest-cov>=6.1.1",
125125
"pytest-dependency~=0.6",
126+
"pytest-durations~=1.6.1",
126127
"pytest-xdist~=3.3",
127128
"ipykernel~=6.29",
128129
"ipywidgets~=8.1",

0 commit comments

Comments
 (0)