22 workflow_call :
33 inputs :
44 push_coverage :
5- type : boolean
6- description : " If true, the coverage results are pushed to codecov.io."
7- default : true
5+ type : boolean
6+ description : " If true, the coverage results are pushed to codecov.io."
7+ default : true
88 secrets :
99 CODECOV_TOKEN :
10- required : false
10+ required : false
1111
1212env :
13- HF_HUB_DOWNLOAD_TIMEOUT : " 60 "
14- HF_HUB_ETAG_TIMEOUT : " 60 "
13+ HF_HUB_DOWNLOAD_TIMEOUT : " 90 "
14+ HF_HUB_ETAG_TIMEOUT : " 90 "
1515 UV_FROZEN : " 1"
16+ PYTEST_ML : |-
17+ tests/test_e2e_conversion.py
18+ tests/test_e2e_ocr_conversion.py
19+ tests/test_backend_webp.py
20+ tests/test_asr_pipeline.py
21+ tests/test_threaded_pipeline.py
22+ PYTEST_TO_SKIP : |-
23+ EXAMPLES_TO_SKIP : ' ^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping)\.py$'
1624
1725jobs :
18- run-checks :
26+ lint :
1927 runs-on : ubuntu-latest
2028 strategy :
29+ fail-fast : false
2130 matrix :
22- python-version : ['3.9', '3.10', '3.11', '3.12', '3.13 ']
31+ python-version : ['3.12 ']
2332 steps :
24- - uses : actions/checkout@v4
25- - name : Install tesseract and ffmpeg
26- run : sudo apt-get update && sudo apt-get install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
27- - name : Set TESSDATA_PREFIX
28- run : |
29- echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
30- - name : Cache Hugging Face models
31- uses : actions/cache@v4
32- with :
33- path : ~/.cache/huggingface
34- key : huggingface-cache-py${{ matrix.python-version }}
33+ - uses : actions/checkout@v5
34+
3535 - name : Install uv and set the python version
36- uses : astral-sh/setup-uv@v5
36+ uses : astral-sh/setup-uv@v6
3737 with :
3838 python-version : ${{ matrix.python-version }}
3939 enable-cache : true
40- - name : pre-commit cache key
40+
41+ - name : Set pre-commit cache key
4142 run : echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV"
42- - uses : actions/cache@v4
43+
44+ - name : Cache pre-commit environments
45+ uses : actions/cache@v4
4346 with :
4447 path : ~/.cache/pre-commit
4548 key : pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
46- - name : Install dependencies
49+ restore-keys : |
50+ pre-commit|${{ env.PY }}|
51+
52+ - name : Install Python Dependencies
4753 run : uv sync --frozen --all-extras
48- - name : Check style and run tests
49- run : pre-commit run --all-files
50- - name : Testing
51- run : |
52- uv run --no-sync pytest -v --cov=docling --cov-report=xml tests
53- - name : Upload coverage to Codecov
54- if : inputs.push_coverage
55- uses : codecov/codecov-action@v5
56- with :
57- token : ${{ secrets.CODECOV_TOKEN }}
58- files : ./coverage.xml
59- - name : Run examples
54+
55+ - name : Check style
6056 run : |
61- for file in docs/examples/*.py; do
62- # Skip batch_convert.py
63- if [[ "$(basename "$file")" =~ ^(batch_convert|granitedocling_repetition_stopping|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model).py ]]; then
64- echo "Skipping $file"
65- continue
57+ echo "--- Running pre-commit style checks ---"
58+ uv run pre-commit run --all-files
59+
60+ run-tests-1 :
61+ runs-on : ubuntu-latest
62+ strategy :
63+ fail-fast : false
64+ matrix :
65+ python-version : ['3.9', '3.10', '3.11', '3.12', '3.13']
66+ steps :
67+ - uses : actions/checkout@v5
68+
69+ - name : Grant permissions to APT cache directory # allows restore
70+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
71+
72+ - name : Cache APT packages
73+ id : apt-cache
74+ uses : actions/cache@v4
75+ with :
76+ path : /var/cache/apt/archives
77+ key : apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
78+ restore-keys : |
79+ apt-packages-${{ runner.os }}-
80+
81+ - name : Install System Dependencies
82+ run : |
83+ if [[ "${{ steps.apt-cache.outputs.cache-hit }}" != "true" ]]; then
84+ sudo apt-get -qq update
6685 fi
86+ sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
87+
88+ - name : Set TESSDATA_PREFIX
89+ run : echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
90+
91+ - name : Install uv and set the python version
92+ uses : astral-sh/setup-uv@v6
93+ with :
94+ python-version : ${{ matrix.python-version }}
95+
96+ - name : Install Python Dependencies
97+ run : uv sync --frozen --all-extras
98+
99+ - name : Cache Models
100+ uses : actions/cache@v4
101+ with :
102+ path : |
103+ ~/.cache/huggingface
104+ ~/.cache/modelscope
105+ ~/.EasyOCR/
106+ key : models-cache
107+
108+ - name : Pre-download Models
109+ run : uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
110+
111+ - name : Run tests for GROUP1
112+ run : |
113+ echo "--- Running tests ---"
114+ GROUP1=$(echo "$PYTEST_ML" | sed -e 's/^/--ignore=/' | tr '\n' ' ')
115+ echo "Running tests for GROUP1"
116+ uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $GROUP1
117+
118+ - name : Upload coverage to Codecov
119+ if : inputs.push_coverage
120+ uses : codecov/codecov-action@v5
121+ with :
122+ token : ${{ secrets.CODECOV_TOKEN }}
123+ files : ./coverage.xml
124+ flags : run-tests-1
125+
126+ - name : Grant permissions to APT cache directory # allows backup
127+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
128+
129+ run-tests-2 :
130+ runs-on : ubuntu-latest
131+ strategy :
132+ fail-fast : false
133+ matrix :
134+ python-version : ['3.9', '3.10', '3.11', '3.12', '3.13']
135+ steps :
136+ - uses : actions/checkout@v5
137+
138+ - name : Grant permissions to APT cache directory # allows restore
139+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
140+
141+ - name : Cache APT packages
142+ id : apt-cache
143+ uses : actions/cache@v4
144+ with :
145+ path : /var/cache/apt/archives
146+ key : apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
147+ restore-keys : |
148+ apt-packages-${{ runner.os }}-
149+
150+ - name : Install System Dependencies
151+ run : |
152+ if [[ "${{ steps.apt-cache.outputs.cache-hit }}" != "true" ]]; then
153+ sudo apt-get -qq update
154+ fi
155+ sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
156+
157+ - name : Set TESSDATA_PREFIX
158+ run : echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
67159
68- echo "Running example $file"
69- uv run --no-sync python "$file" || exit 1
70- done
160+ - name : Install uv and set the python version
161+ uses : astral-sh/setup-uv@v6
162+ with :
163+ python-version : ${{ matrix.python-version }}
164+
165+ - name : Install Python Dependencies
166+ run : uv sync --frozen --all-extras
167+
168+ - name : Cache Models
169+ uses : actions/cache@v4
170+ with :
171+ path : |
172+ ~/.cache/huggingface
173+ ~/.cache/modelscope
174+ ~/.EasyOCR/
175+ key : models-cache
176+
177+ - name : Pre-download Models
178+ run : uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
179+
180+ - name : Run tests for GROUP2
181+ run : |
182+ echo "--- Running tests ---"
183+ GROUP2=$(echo "$PYTEST_ML" | tr '\n' ' ')
184+ echo "Running tests for GROUP2"
185+ DESELECT_OPT=""
186+ if [ -n "$PYTEST_TO_SKIP" ]; then
187+ DESELECT_OPT="--deselect $PYTEST_TO_SKIP"
188+ fi
189+ echo "Running tests for GROUP2"
190+ uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $GROUP2 $DESELECT_OPT
191+
192+ - name : Upload coverage to Codecov
193+ if : inputs.push_coverage
194+ uses : codecov/codecov-action@v5
195+ with :
196+ token : ${{ secrets.CODECOV_TOKEN }}
197+ files : ./coverage.xml
198+ flags : run-tests-2
199+
200+ - name : Grant permissions to APT cache directory # allows backup
201+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
202+
203+ run-examples :
204+ runs-on : ubuntu-latest
205+ strategy :
206+ fail-fast : false
207+ matrix :
208+ python-version : ['3.9', '3.10', '3.11', '3.12', '3.13']
209+ steps :
210+ - uses : actions/checkout@v5
211+
212+ - name : Grant permissions to APT cache directory # allows restore
213+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
214+
215+ - name : Cache APT packages
216+ id : apt-cache
217+ uses : actions/cache@v4
218+ with :
219+ path : /var/cache/apt/archives
220+ key : apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
221+ restore-keys : |
222+ apt-packages-${{ runner.os }}-
223+
224+ - name : Install System Dependencies
225+ run : |
226+ if [[ "${{ steps.apt-cache.outputs.cache-hit }}" != "true" ]]; then
227+ sudo apt-get -qq update
228+ fi
229+ sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
230+
231+ - name : Set TESSDATA_PREFIX
232+ run : echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
233+
234+ - name : Install uv and set the python version
235+ uses : astral-sh/setup-uv@v6
236+ with :
237+ python-version : ${{ matrix.python-version }}
238+
239+ - name : Install Python Dependencies
240+ run : uv sync --frozen --all-extras
241+
242+ - name : Cache Models
243+ uses : actions/cache@v4
244+ with :
245+ path : |
246+ ~/.cache/huggingface
247+ ~/.cache/modelscope
248+ ~/.EasyOCR/
249+ key : models-cache
250+
251+ - name : Pre-download Models
252+ run : uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
253+
254+ - name : Run examples
255+ run : |
256+ echo "--- Creating output directory ---"
257+ mkdir -p scratch
258+
259+ echo "--- Running examples ---"
260+
261+ summary_file="runtime_summary.log"
262+ echo "--- Example Runtimes ---" > "$summary_file"
263+
264+ for file in docs/examples/*.py; do
265+ if [[ "$(basename "$file")" =~ ${EXAMPLES_TO_SKIP} ]]; then
266+ echo "Skipping example: $(basename "$file")"
267+ else
268+ echo "--- Running example $(basename "$file") ---"
269+
270+ start_time=$SECONDS
271+
272+ uv run --no-sync python "$file" || exit 1
273+ duration=$((SECONDS - start_time))
274+ echo "Finished in ${duration}s."
275+
276+ echo "$(basename "$file"): ${duration}s" >> "$summary_file"
277+ fi
278+ done
279+
280+ echo
281+ echo "==================================="
282+ echo " Final Runtime Summary "
283+ echo "==================================="
284+ cat "$summary_file"
285+ echo "==================================="
286+
287+ - name : Grant permissions to APT cache directory # allows backup
288+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
71289
72290 build-package :
73291 runs-on : ubuntu-latest
74292 strategy :
75293 matrix :
76294 python-version : ['3.12']
77295 steps :
78- - uses : actions/checkout@v4
296+ - uses : actions/checkout@v5
297+
79298 - name : Install uv and set the python version
80- uses : astral-sh/setup-uv@v5
299+ uses : astral-sh/setup-uv@v6
81300 with :
82301 python-version : ${{ matrix.python-version }}
83302 enable-cache : true
303+
84304 - name : Install dependencies
85305 run : uv sync --all-extras
306+
86307 - name : Build package
87308 run : uv build
309+
88310 - name : Check content of wheel
89311 run : unzip -l dist/*.whl
312+
90313 - name : Store the distribution packages
91314 uses : actions/upload-artifact@v4
92315 with :
@@ -106,12 +329,17 @@ jobs:
106329 with :
107330 name : python-package-distributions
108331 path : dist/
332+
109333 - name : Install uv and set the python version
110- uses : astral-sh/setup-uv@v5
334+ uses : astral-sh/setup-uv@v6
111335 with :
112336 python-version : ${{ matrix.python-version }}
113- enable-cache : true
337+ activate-environment : true
338+ enable-cache : false
339+
114340 - name : Install package
115- run : uv pip install dist/*.whl
341+ run : |
342+ uv pip install dist/*.whl
343+
116344 - name : Run docling
117- run : docling --help
345+ run : uv run docling --help
0 commit comments