feat: Support for Python 3.14 (#2530)

dolfim-ibm · web-flow · commit cdffb47b9a12 · 2025-10-28T14:32:15.000+01:00
* fix dependencies for py314

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* add metadata and CI tests

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* add back gliner

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* update error message about python 3.14 availability

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* skip tests which cannot run on py 3.14

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* fix lint

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* remove vllm from py 3.14 deps

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* safe import for vllm

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* update lock

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* remove torch.compile()

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* update checkbox results after docling-core changes

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* cannot run mlx example in CI

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* add test for rapidocr backends and skip onnxruntime on py3.14

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* fix other occurances of torch.compile()

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

* allow torch.compile for Python &lt;3.14. proper support will be introduced with new torch releases

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;

---------

Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -20,7 +20,7 @@ env:
     tests/test_asr_pipeline.py
     tests/test_threaded_pipeline.py
   PYTEST_TO_SKIP: |-
-  EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping)\.py$'
+  EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping|mlx_whisper_example)\.py$'
 
 jobs:
   lint:
@@ -62,7 +62,7 @@ jobs:
       strategy:
         fail-fast: false
         matrix:
-          python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+          python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
       steps:
         - uses: actions/checkout@v5
 
@@ -129,7 +129,7 @@ jobs:
       strategy:
         fail-fast: false
         matrix:
-          python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+          python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
       steps:
         - uses: actions/checkout@v5
 
@@ -201,7 +201,7 @@ jobs:
       strategy:
         fail-fast: false
         matrix:
-          python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+          python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
       steps:
         - uses: actions/checkout@v5
 
diff --git a/docling/cli/main.py b/docling/cli/main.py
@@ -738,10 +738,15 @@ def convert(  # noqa: C901
 
                         pipeline_options.vlm_options = SMOLDOCLING_MLX
                     except ImportError:
-                        _log.warning(
-                            "To run SmolDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+                        if sys.version_info < (3, 14):
+                            _log.warning(
+                                "To run SmolDocling faster, please install mlx-vlm:\n"
+                                "pip install mlx-vlm"
+                            )
+                        else:
+                            _log.warning(
+                                "You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
+                            )
 
             elif vlm_model == VlmModelType.GRANITEDOCLING:
                 pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
@@ -751,10 +756,16 @@ def convert(  # noqa: C901
 
                         pipeline_options.vlm_options = GRANITEDOCLING_MLX
                     except ImportError:
-                        _log.warning(
-                            "To run GraniteDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+                        if sys.version_info < (3, 14):
+                            _log.warning(
+                                "To run GraniteDocling faster, please install mlx-vlm:\n"
+                                "pip install mlx-vlm"
+                            )
+                        else:
+                            _log.warning(
+                                "You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
+                            )
+
             elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
                 pipeline_options.vlm_options = SMOLDOCLING_VLLM
 
diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py
@@ -1,3 +1,4 @@
+import sys
 import threading
 from collections.abc import Iterable
 from pathlib import Path
@@ -75,7 +76,10 @@ def __init__(
                         else "sdpa"
                     ),
                 )
-                self.model = torch.compile(self.model)  # type: ignore
+                if sys.version_info < (3, 14):
+                    self.model = torch.compile(self.model)  # type: ignore
+                else:
+                    self.model.eval()
 
             self.provenance = f"{self.options.repo_id}"
 
diff --git a/docling/models/vlm_models_inline/hf_transformers_model.py b/docling/models/vlm_models_inline/hf_transformers_model.py
@@ -1,5 +1,6 @@
 import importlib.metadata
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -129,7 +130,10 @@ def __init__(
                 trust_remote_code=vlm_options.trust_remote_code,
                 revision=vlm_options.revision,
             )
-            self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            if sys.version_info < (3, 14):
+                self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            else:
+                self.vlm_model.eval()
 
             # Load generation config
             self.generation_config = GenerationConfig.from_pretrained(
diff --git a/docling/models/vlm_models_inline/mlx_model.py b/docling/models/vlm_models_inline/mlx_model.py
@@ -50,9 +50,14 @@ def __init__(
                 from mlx_vlm.prompt_utils import apply_chat_template  # type: ignore
                 from mlx_vlm.utils import load_config  # type: ignore
             except ImportError:
-                raise ImportError(
-                    "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
-                )
+                if sys.version_info < (3, 14):
+                    raise ImportError(
+                        "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
+                    )
+                else:
+                    raise ImportError(
+                        "mlx-vlm is not installed. It is not yet available on Python 3.14."
+                    )
 
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
 
diff --git a/docling/models/vlm_models_inline/nuextract_transformers_model.py b/docling/models/vlm_models_inline/nuextract_transformers_model.py
@@ -1,4 +1,5 @@
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -153,7 +154,10 @@ def __init__(
                 ),
                 trust_remote_code=vlm_options.trust_remote_code,
             )
-            self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            if sys.version_info < (3, 14):
+                self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            else:
+                self.vlm_model.eval()
 
             # Load generation config
             self.generation_config = GenerationConfig.from_pretrained(artifacts_path)
diff --git a/docling/models/vlm_models_inline/vllm_model.py b/docling/models/vlm_models_inline/vllm_model.py
@@ -1,4 +1,5 @@
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -100,7 +101,18 @@ def __init__(
             return
 
         from transformers import AutoProcessor
-        from vllm import LLM, SamplingParams
+
+        try:
+            from vllm import LLM, SamplingParams
+        except ImportError:
+            if sys.version_info < (3, 14):
+                raise ImportError(
+                    "vllm is not installed. Please install it via `pip install vllm`."
+                )
+            else:
+                raise ImportError(
+                    "vllm is not installed. It is not yet available on Python 3.14."
+                )
 
         # Device selection
         self.device = decide_device(
diff --git a/docling/pipeline/asr_pipeline.py b/docling/pipeline/asr_pipeline.py
@@ -1,6 +1,7 @@
 import logging
 import os
 import re
+import sys
 import tempfile
 from io import BytesIO
 from pathlib import Path
@@ -117,9 +118,15 @@ def __init__(
             try:
                 import whisper  # type: ignore
             except ImportError:
-                raise ImportError(
-                    "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
-                )
+                if sys.version_info < (3, 14):
+                    raise ImportError(
+                        "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
+                    )
+                else:
+                    raise ImportError(
+                        "whisper is not installed. Unfortunately its dependencies are not yet available for Python 3.14."
+                    )
+
             self.asr_options = asr_options
             self.max_tokens = asr_options.max_new_tokens
             self.temperature = asr_options.temperature
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,6 +30,7 @@ classifiers = [
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: 3.14",
 ]
 readme = "README.md"
 authors = [
@@ -63,7 +64,7 @@ dependencies = [
   'pandas (>=2.1.4,<3.0.0)',
   'marko (>=2.1.2,<3.0.0)',
   'openpyxl (>=3.1.5,<4.0.0)',
-  'lxml (>=4.0.0,<6.0.0)',
+  'lxml (>=4.0.0,<7.0.0)',
   'pillow (>=10.0.0,<12.0.0)',
   'tqdm (>=4.65.0,<5.0.0)',
   'pluggy (>=1.0.0,<2.0.0)',
@@ -95,19 +96,19 @@ ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
 vlm = [
   'transformers (>=4.46.0,<5.0.0)',
   'accelerate (>=1.2.1,<2.0.0)',
-  'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
-  'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
+  'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
+  'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64"',
   "qwen-vl-utils>=0.0.11",
 ]
 rapidocr = [
-  'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"',
-  'onnxruntime (>=1.7.0,<2.0.0)',
+  'rapidocr (>=3.3,<4.0.0)',
+  'onnxruntime (>=1.7.0,<2.0.0) ; python_version < "3.14"',
   # 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
   # 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
 ]
 asr = [
-    'mlx-whisper>=0.4.3 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
-    "openai-whisper>=20250625",
+    'mlx-whisper>=0.4.3 ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
+    'openai-whisper>=20250625 ; python_version < "3.14"',
 ]
 
 [dependency-groups]
@@ -146,10 +147,10 @@ examples = [
   "langchain-milvus~=0.1",
   "langchain-text-splitters~=0.2",
   "modelscope>=1.29.0",
-  "gliner>=0.2.21",
+  'gliner>=0.2.21 ; python_version < "3.14"',  # gliner depends on onnxruntime which is not available on py3.14
 ]
 constraints = [
-  'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
+  'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10" and python_version < "3.14"',
   'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
 ]
 
diff --git a/tests/data/groundtruth/docling_v2/right_to_left_03.md b/tests/data/groundtruth/docling_v2/right_to_left_03.md
@@ -16,17 +16,17 @@
 
 استاندارد اجباری است؟
 
-بلی
+- [ ] بلی
 
-خير
+- [x] خير
 
 مرجع صادرکننده استاندارد
 
 سازمان ملی استاندارد ايران
 
 آيا توليدکننده محصول، استاندارد مذکور را اخذ نموده است؟
 
-بلی        خير
+- [x] بلی        خير
 
 ## -3 پذيرش در بورس
 
diff --git a/tests/test_asr_pipeline.py b/tests/test_asr_pipeline.py
@@ -1,3 +1,4 @@
+import sys
 from pathlib import Path
 from unittest.mock import Mock, patch
 
@@ -10,6 +11,11 @@
 from docling.document_converter import AudioFormatOption, DocumentConverter
 from docling.pipeline.asr_pipeline import AsrPipeline
 
+pytestmark = pytest.mark.skipif(
+    sys.version_info >= (3, 14),
+    reason="Python 3.14 is not yet supported by whisper dependencies.",
+)
+
 
 @pytest.fixture
 def test_audio_path():
diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py
@@ -70,13 +70,19 @@ def test_e2e_conversions():
         (EasyOcrOptions(force_full_page_ocr=True), False),
     ]
 
-    # rapidocr is only available for Python >=3.6,<3.13
-    if sys.version_info < (3, 13):
-        engines.append((RapidOcrOptions(), False))
-        engines.append((RapidOcrOptions(force_full_page_ocr=True), False))
+    for rapidocr_backend in ["onnxruntime", "torch"]:
+        if sys.version_info >= (3, 14) and rapidocr_backend == "onnxruntime":
+            # skip onnxruntime backend on Python 3.14
+            continue
+
+        engines.append((RapidOcrOptions(backend=rapidocr_backend), False))
+        engines.append(
+            (RapidOcrOptions(backend=rapidocr_backend, force_full_page_ocr=True), False)
+        )
         engines.append(
             (
                 RapidOcrOptions(
+                    backend=rapidocr_backend,
                     force_full_page_ocr=True,
                     rec_font_path="test",
                     rapidocr_params={"Rec.font_path": None},  # overwrites rec_font_path
diff --git a/uv.lock b/uv.lock