22 workflow_call :
33 inputs :
44 push_coverage :
5- type : boolean
6- description : " If true, the coverage results are pushed to codecov.io."
7- default : true
5+ type : boolean
6+ description : " If true, the coverage results are pushed to codecov.io."
7+ default : true
88 secrets :
99 CODECOV_TOKEN :
10- required : false
10+ required : false
1111
1212env :
13- HF_HUB_DOWNLOAD_TIMEOUT : " 60 "
14- HF_HUB_ETAG_TIMEOUT : " 60 "
13+ HF_HUB_DOWNLOAD_TIMEOUT : " 90 "
14+ HF_HUB_ETAG_TIMEOUT : " 90 "
1515 UV_FROZEN : " 1"
16+ PYTEST_ML : |-
17+ tests/test_e2e_conversion.py
18+ tests/test_e2e_ocr_conversion.py
19+ tests/test_backend_webp.py
20+ tests/test_asr_pipeline.py
21+ tests/test_threaded_pipeline.py
22+ PYTEST_TO_SKIP : |-
23+ EXAMPLES_TO_SKIP : ' ^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping)\.py$'
1624
1725jobs :
18- run-checks :
26+ lint :
1927 runs-on : ubuntu-latest
2028 strategy :
29+ fail-fast : false
2130 matrix :
22- python-version : ['3.9', '3.10', '3.11', '3.12', '3.13 ']
31+ python-version : ['3.12 ']
2332 steps :
24- - uses : actions/checkout@v4
25- - name : Install tesseract and ffmpeg
26- run : sudo apt-get update && sudo apt-get install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
27- - name : Set TESSDATA_PREFIX
28- run : |
29- echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
30- - name : Cache Hugging Face models
31- uses : actions/cache@v4
32- with :
33- path : ~/.cache/huggingface
34- key : huggingface-cache-py${{ matrix.python-version }}
33+ - uses : actions/checkout@v5
34+
3535 - name : Install uv and set the python version
36- uses : astral-sh/setup-uv@v5
36+ uses : astral-sh/setup-uv@v6
3737 with :
3838 python-version : ${{ matrix.python-version }}
3939 enable-cache : true
40- - name : pre-commit cache key
40+
41+ - name : Set pre-commit cache key
4142 run : echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV"
42- - uses : actions/cache@v4
43+
44+ - name : Cache pre-commit environments
45+ uses : actions/cache@v4
4346 with :
4447 path : ~/.cache/pre-commit
4548 key : pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
46- - name : Install dependencies
49+ restore-keys : |
50+ pre-commit|${{ env.PY }}|
51+
52+ - name : Install Python Dependencies
4753 run : uv sync --frozen --all-extras
48- - name : Check style and run tests
49- run : pre-commit run --all-files
50- - name : Testing
51- run : |
52- uv run --no-sync pytest -v --cov=docling --cov-report=xml tests
53- - name : Upload coverage to Codecov
54- if : inputs.push_coverage
55- uses : codecov/codecov-action@v5
56- with :
57- token : ${{ secrets.CODECOV_TOKEN }}
58- files : ./coverage.xml
59- - name : Run examples
54+
55+ - name : Check style
6056 run : |
61- for file in docs/examples/*.py; do
62- # Skip batch_convert.py
63- if [[ "$(basename "$file")" =~ ^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model).py ]]; then
64- echo "Skipping $file"
65- continue
57+ echo "--- Running pre-commit style checks ---"
58+ uv run pre-commit run --all-files
59+
60+ run-tests-1 :
61+ runs-on : ubuntu-latest
62+ strategy :
63+ fail-fast : false
64+ matrix :
65+ python-version : ['3.9', '3.10', '3.11', '3.12', '3.13']
66+ steps :
67+ - uses : actions/checkout@v5
68+
69+ - name : Grant permissions to APT cache directory # allows restore
70+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
71+
72+ - name : Cache APT packages
73+ id : apt-cache
74+ uses : actions/cache@v4
75+ with :
76+ path : /var/cache/apt/archives
77+ key : apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
78+ restore-keys : |
79+ apt-packages-${{ runner.os }}-
80+
81+ - name : Install System Dependencies
82+ run : |
83+ sudo apt-get -qq update
84+ sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev libreoffice pkg-config
85+
86+ - name : Set TESSDATA_PREFIX
87+ run : echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
88+
89+ - name : Install uv and set the python version
90+ uses : astral-sh/setup-uv@v6
91+ with :
92+ python-version : ${{ matrix.python-version }}
93+
94+ - name : Install Python Dependencies
95+ run : uv sync --frozen --all-extras
96+
97+ - name : Cache Models
98+ uses : actions/cache@v4
99+ with :
100+ path : |
101+ ~/.cache/huggingface
102+ ~/.cache/modelscope
103+ ~/.EasyOCR/
104+ key : models-cache
105+
106+ - name : Pre-download Models
107+ run : uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
108+
109+ - name : Run tests for GROUP1
110+ run : |
111+ echo "--- Running tests ---"
112+ GROUP1=$(echo "$PYTEST_ML" | sed -e 's/^/--ignore=/' | tr '\n' ' ')
113+ echo "Running tests for GROUP1"
114+ uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $GROUP1
115+
116+ - name : Upload coverage to Codecov
117+ if : inputs.push_coverage
118+ uses : codecov/codecov-action@v5
119+ with :
120+ token : ${{ secrets.CODECOV_TOKEN }}
121+ files : ./coverage.xml
122+ flags : run-tests-1
123+
124+ - name : Grant permissions to APT cache directory # allows backup
125+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
126+
127+ run-tests-2 :
128+ runs-on : ubuntu-latest
129+ strategy :
130+ fail-fast : false
131+ matrix :
132+ python-version : ['3.9', '3.10', '3.11', '3.12', '3.13']
133+ steps :
134+ - uses : actions/checkout@v5
135+
136+ - name : Grant permissions to APT cache directory # allows restore
137+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
138+
139+ - name : Cache APT packages
140+ id : apt-cache
141+ uses : actions/cache@v4
142+ with :
143+ path : /var/cache/apt/archives
144+ key : apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
145+ restore-keys : |
146+ apt-packages-${{ runner.os }}-
147+
148+ - name : Install System Dependencies
149+ run : |
150+ sudo apt-get -qq update
151+ sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev libreoffice pkg-config
152+
153+ - name : Set TESSDATA_PREFIX
154+ run : echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
155+
156+ - name : Install uv and set the python version
157+ uses : astral-sh/setup-uv@v6
158+ with :
159+ python-version : ${{ matrix.python-version }}
160+
161+ - name : Install Python Dependencies
162+ run : uv sync --frozen --all-extras
163+
164+ - name : Cache Models
165+ uses : actions/cache@v4
166+ with :
167+ path : |
168+ ~/.cache/huggingface
169+ ~/.cache/modelscope
170+ ~/.EasyOCR/
171+ key : models-cache
172+
173+ - name : Pre-download Models
174+ run : uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
175+
176+ - name : Run tests for GROUP2
177+ run : |
178+ echo "--- Running tests ---"
179+ GROUP2=$(echo "$PYTEST_ML" | tr '\n' ' ')
180+ echo "Running tests for GROUP2"
181+ DESELECT_OPT=""
182+ if [ -n "$PYTEST_TO_SKIP" ]; then
183+ DESELECT_OPT="--deselect $PYTEST_TO_SKIP"
66184 fi
185+ echo "Running tests for GROUP2"
186+ uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $GROUP2 $DESELECT_OPT
187+
188+ - name : Upload coverage to Codecov
189+ if : inputs.push_coverage
190+ uses : codecov/codecov-action@v5
191+ with :
192+ token : ${{ secrets.CODECOV_TOKEN }}
193+ files : ./coverage.xml
194+ flags : run-tests-2
195+
196+ - name : Grant permissions to APT cache directory # allows backup
197+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
198+
199+ run-examples :
200+ runs-on : ubuntu-latest
201+ strategy :
202+ fail-fast : false
203+ matrix :
204+ python-version : ['3.9', '3.10', '3.11', '3.12', '3.13']
205+ steps :
206+ - uses : actions/checkout@v5
207+
208+ - name : Grant permissions to APT cache directory # allows restore
209+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
210+
211+ - name : Cache APT packages
212+ id : apt-cache
213+ uses : actions/cache@v4
214+ with :
215+ path : /var/cache/apt/archives
216+ key : apt-packages-${{ runner.os }}-${{ hashFiles('.github/workflows/checks.yml') }}
217+ restore-keys : |
218+ apt-packages-${{ runner.os }}-
219+
220+ - name : Install System Dependencies
221+ run : |
222+ sudo apt-get -qq update
223+ sudo apt-get -qq install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev libreoffice pkg-config
224+
225+ - name : Set TESSDATA_PREFIX
226+ run : echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
227+
228+ - name : Install uv and set the python version
229+ uses : astral-sh/setup-uv@v6
230+ with :
231+ python-version : ${{ matrix.python-version }}
232+
233+ - name : Install Python Dependencies
234+ run : uv sync --frozen --all-extras
67235
68- echo "Running example $file"
69- uv run --no-sync python "$file" || exit 1
70- done
236+ - name : Cache Models
237+ uses : actions/cache@v4
238+ with :
239+ path : |
240+ ~/.cache/huggingface
241+ ~/.cache/modelscope
242+ ~/.EasyOCR/
243+ key : models-cache
244+
245+ - name : Pre-download Models
246+ run : uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
247+
248+ - name : Run examples
249+ run : |
250+ echo "--- Creating output directory ---"
251+ mkdir -p scratch
252+
253+ echo "--- Running examples ---"
254+
255+ summary_file="runtime_summary.log"
256+ echo "--- Example Runtimes ---" > "$summary_file"
257+
258+ for file in docs/examples/*.py; do
259+ if [[ "$(basename "$file")" =~ ${EXAMPLES_TO_SKIP} ]]; then
260+ echo "Skipping example: $(basename "$file")"
261+ else
262+ echo "--- Running example $(basename "$file") ---"
263+
264+ start_time=$SECONDS
265+
266+ uv run --no-sync python "$file" || exit 1
267+ duration=$((SECONDS - start_time))
268+ echo "Finished in ${duration}s."
269+
270+ echo "$(basename "$file"): ${duration}s" >> "$summary_file"
271+ fi
272+ done
273+
274+ echo
275+ echo "==================================="
276+ echo " Final Runtime Summary "
277+ echo "==================================="
278+ cat "$summary_file"
279+ echo "==================================="
280+
281+ - name : Grant permissions to APT cache directory # allows backup
282+ run : sudo chown -R $USER:$USER /var/cache/apt/archives
71283
72284 build-package :
73285 runs-on : ubuntu-latest
74286 strategy :
75287 matrix :
76288 python-version : ['3.12']
77289 steps :
78- - uses : actions/checkout@v4
290+ - uses : actions/checkout@v5
291+
79292 - name : Install uv and set the python version
80- uses : astral-sh/setup-uv@v5
293+ uses : astral-sh/setup-uv@v6
81294 with :
82295 python-version : ${{ matrix.python-version }}
83296 enable-cache : true
297+
84298 - name : Install dependencies
85299 run : uv sync --all-extras
300+
86301 - name : Build package
87302 run : uv build
303+
88304 - name : Check content of wheel
89305 run : unzip -l dist/*.whl
306+
90307 - name : Store the distribution packages
91308 uses : actions/upload-artifact@v4
92309 with :
@@ -106,12 +323,17 @@ jobs:
106323 with :
107324 name : python-package-distributions
108325 path : dist/
326+
109327 - name : Install uv and set the python version
110- uses : astral-sh/setup-uv@v5
328+ uses : astral-sh/setup-uv@v6
111329 with :
112330 python-version : ${{ matrix.python-version }}
113- enable-cache : true
331+ activate-environment : true
332+ enable-cache : false
333+
114334 - name : Install package
115- run : uv pip install dist/*.whl
335+ run : |
336+ uv pip install dist/*.whl
337+
116338 - name : Run docling
117- run : docling --help
339+ run : uv run docling --help
0 commit comments