Skip to content

Commit 7e223c7

Browse files
committed
Merge branch 'main' into fix/issue-2654-page-indexing
2 parents 4858d71 + 5c1f8f0 commit 7e223c7

30 files changed

+2822
-1728
lines changed

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
## [v2.67.0](https://github.com/docling-project/docling/releases/tag/v2.67.0) - 2026-01-09
2+
3+
### Feature
4+
5+
* Enrichment annotations in the new meta format ([#2859](https://github.com/docling-project/docling/issues/2859)) ([`aab3ff5`](https://github.com/docling-project/docling/commit/aab3ff5d82fc54864657c0c2ff8e0aa21461f23f))
6+
* Add XPU device support for Intel GPUs ([#2809](https://github.com/docling-project/docling/issues/2809)) ([`2b83fdd`](https://github.com/docling-project/docling/commit/2b83fdd0deeec0f1ad016cc78ea42d3144a86cad))
7+
* Add option to report timings details ([#2772](https://github.com/docling-project/docling/issues/2772)) ([`cbc6537`](https://github.com/docling-project/docling/commit/cbc6537ee856349cef9b1ed453310e269667534c))
8+
9+
### Fix
10+
11+
* Lock new deps and update python 3.14 warnings ([#2844](https://github.com/docling-project/docling/issues/2844)) ([`d9295df`](https://github.com/docling-project/docling/commit/d9295df30202e251c9e282a4d0ce61653c7268b6))
12+
* Correct type hint for table_structure_options usage ([#2823](https://github.com/docling-project/docling/issues/2823)) ([`a0530a2`](https://github.com/docling-project/docling/commit/a0530a271e5411bae96cfcb8a6ae23b3431a7462))
13+
* Transformers models lazy-loaded ([#2826](https://github.com/docling-project/docling/issues/2826)) ([`3ef4525`](https://github.com/docling-project/docling/commit/3ef45258b7de6f5efe13b1f5ba392d72e2854349))
14+
* Font download by passing font_path to RapidOcr ([#2822](https://github.com/docling-project/docling/issues/2822)) ([`ffafe58`](https://github.com/docling-project/docling/commit/ffafe58ad25e0ebdca44a41f8df9f691c9a393bf))
15+
* **cli:** Add Layout and Table models to --show-external-plugins ([#2832](https://github.com/docling-project/docling/issues/2832)) ([`ed57089`](https://github.com/docling-project/docling/commit/ed5708907fcd29e56b114ce7062c4c2693c67b6b))
16+
117
## [v2.66.0](https://github.com/docling-project/docling/releases/tag/v2.66.0) - 2025-12-24
218

319
### Feature

docling/backend/msword_backend.py

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,9 +1515,9 @@ def _is_rich_table_cell(self, cell: _Cell) -> bool:
15151515
def _handle_pictures(
15161516
self, drawing_blip: Any, doc: DoclingDocument
15171517
) -> list[RefItem]:
1518-
def get_docx_image(drawing_blip: Any) -> Optional[bytes]:
1518+
def get_docx_image(image: Any) -> Optional[bytes]:
15191519
image_data: Optional[bytes] = None
1520-
rId = drawing_blip[0].get(
1520+
rId = image.get(
15211521
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
15221522
)
15231523
if rId in self.docx_obj.part.rels:
@@ -1527,36 +1527,47 @@ def get_docx_image(drawing_blip: Any) -> Optional[bytes]:
15271527
return image_data
15281528

15291529
elem_ref: list[RefItem] = []
1530-
level = self._get_level()
1531-
# Open the BytesIO object with PIL to create an Image
1532-
image_data: Optional[bytes] = get_docx_image(drawing_blip)
1533-
if image_data is None:
1534-
_log.warning("Warning: image cannot be found")
1535-
p1 = doc.add_picture(
1536-
parent=self.parents[level - 1],
1537-
caption=None,
1538-
content_layer=self.content_layer,
1539-
)
1540-
elem_ref.append(p1.get_ref())
1541-
else:
1542-
try:
1543-
image_bytes = BytesIO(image_data)
1544-
pil_image = Image.open(image_bytes)
1545-
p2 = doc.add_picture(
1546-
parent=self.parents[level - 1],
1547-
image=ImageRef.from_pil(image=pil_image, dpi=72),
1548-
caption=None,
1549-
content_layer=self.content_layer,
1550-
)
1551-
elem_ref.append(p2.get_ref())
1552-
except (UnidentifiedImageError, OSError):
1553-
_log.warning("Warning: image cannot be loaded by Pillow")
1554-
p3 = doc.add_picture(
1530+
if drawing_blip:
1531+
level = self._get_level()
1532+
# Open the BytesIO object with PIL to create an Image
1533+
parent: Optional[NodeItem] = (
1534+
self.parents[level - 1]
1535+
if len(drawing_blip) == 1
1536+
else doc.add_group(
1537+
label=GroupLabel.PICTURE_AREA,
15551538
parent=self.parents[level - 1],
1556-
caption=None,
15571539
content_layer=self.content_layer,
15581540
)
1559-
elem_ref.append(p3.get_ref())
1541+
)
1542+
for image in drawing_blip:
1543+
image_data: Optional[bytes] = get_docx_image(image)
1544+
if image_data is None:
1545+
_log.warning("Warning: image cannot be found")
1546+
p1 = doc.add_picture(
1547+
parent=parent,
1548+
caption=None,
1549+
content_layer=self.content_layer,
1550+
)
1551+
elem_ref.append(p1.get_ref())
1552+
else:
1553+
try:
1554+
image_bytes = BytesIO(image_data)
1555+
pil_image = Image.open(image_bytes)
1556+
p2 = doc.add_picture(
1557+
parent=parent,
1558+
image=ImageRef.from_pil(image=pil_image, dpi=72),
1559+
caption=None,
1560+
content_layer=self.content_layer,
1561+
)
1562+
elem_ref.append(p2.get_ref())
1563+
except (UnidentifiedImageError, OSError):
1564+
_log.warning("Warning: image cannot be loaded by Pillow")
1565+
p3 = doc.add_picture(
1566+
parent=parent,
1567+
caption=None,
1568+
content_layer=self.content_layer,
1569+
)
1570+
elem_ref.append(p3.get_ref())
15601571
return elem_ref
15611572

15621573
def _handle_drawingml(self, doc: DoclingDocument, drawingml_els: Any):

docling/cli/main.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -833,15 +833,10 @@ def convert( # noqa: C901
833833

834834
pipeline_options.vlm_options = SMOLDOCLING_MLX
835835
except ImportError:
836-
if sys.version_info < (3, 14):
837-
_log.warning(
838-
"To run SmolDocling faster, please install mlx-vlm:\n"
839-
"pip install mlx-vlm"
840-
)
841-
else:
842-
_log.warning(
843-
"You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
844-
)
836+
_log.warning(
837+
"To run SmolDocling faster, please install mlx-vlm:\n"
838+
"pip install mlx-vlm"
839+
)
845840

846841
elif vlm_model == VlmModelType.GRANITEDOCLING:
847842
pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
@@ -851,15 +846,10 @@ def convert( # noqa: C901
851846

852847
pipeline_options.vlm_options = GRANITEDOCLING_MLX
853848
except ImportError:
854-
if sys.version_info < (3, 14):
855-
_log.warning(
856-
"To run GraniteDocling faster, please install mlx-vlm:\n"
857-
"pip install mlx-vlm"
858-
)
859-
else:
860-
_log.warning(
861-
"You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
862-
)
849+
_log.warning(
850+
"To run GraniteDocling faster, please install mlx-vlm:\n"
851+
"pip install mlx-vlm"
852+
)
863853

864854
elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
865855
pipeline_options.vlm_options = SMOLDOCLING_VLLM

docling/models/document_picture_classifier.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
NodeItem,
99
PictureClassificationClass,
1010
PictureClassificationData,
11+
PictureClassificationMetaField,
1112
PictureItem,
13+
PictureMeta,
1214
)
15+
from docling_core.types.doc.document import PictureClassificationPrediction
1316
from PIL import Image
1417
from pydantic import BaseModel
1518

@@ -169,17 +172,38 @@ def __call__(
169172
outputs = self.document_picture_classifier.predict(images)
170173

171174
for item, output in zip(elements, outputs):
175+
predicted_classes = [
176+
PictureClassificationClass(
177+
class_name=pred[0],
178+
confidence=pred[1],
179+
)
180+
for pred in output
181+
]
182+
183+
# FIXME: annotations is deprecated, remove once all consumers use meta.classification
172184
item.annotations.append(
173185
PictureClassificationData(
174186
provenance="DocumentPictureClassifier",
175-
predicted_classes=[
176-
PictureClassificationClass(
177-
class_name=pred[0],
178-
confidence=pred[1],
179-
)
180-
for pred in output
181-
],
187+
predicted_classes=predicted_classes,
182188
)
183189
)
184190

191+
# Store classification in the new meta field
192+
predictions = [
193+
PictureClassificationPrediction(
194+
class_name=pred.class_name,
195+
confidence=pred.confidence,
196+
created_by="DocumentPictureClassifier",
197+
)
198+
for pred in predicted_classes
199+
]
200+
classification_data = PictureClassificationMetaField(
201+
predictions=predictions,
202+
)
203+
204+
if item.meta is not None:
205+
item.meta.classification = classification_data
206+
else:
207+
item.meta = PictureMeta(classification=classification_data)
208+
185209
yield item

docling/models/picture_description_base_model.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
from typing import List, Optional, Type, Union
55

66
from docling_core.types.doc import (
7+
DescriptionMetaField,
78
DoclingDocument,
89
NodeItem,
910
PictureItem,
11+
PictureMeta,
1012
)
1113
from docling_core.types.doc.document import ( # TODO: move import to docling_core.types.doc
1214
PictureDescriptionData,
@@ -80,9 +82,19 @@ def __call__(
8082
outputs = self._annotate_images(images)
8183

8284
for item, output in zip(elements, outputs):
85+
# FIXME: annotations is deprecated, remove once all consumers use meta.classification
8386
item.annotations.append(
8487
PictureDescriptionData(text=output, provenance=self.provenance)
8588
)
89+
90+
# Store classification in the new meta field
91+
if item.meta is None:
92+
item.meta = PictureMeta()
93+
item.meta.description = DescriptionMetaField(
94+
text=output,
95+
created_by=self.provenance,
96+
)
97+
8698
yield item
8799

88100
@classmethod

docling/models/vlm_models_inline/mlx_model.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,9 @@ def __init__(
5555
from mlx_vlm.prompt_utils import apply_chat_template # type: ignore
5656
from mlx_vlm.utils import load_config # type: ignore
5757
except ImportError:
58-
if sys.version_info < (3, 14):
59-
raise ImportError(
60-
"mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
61-
)
62-
else:
63-
raise ImportError(
64-
"mlx-vlm is not installed. It is not yet available on Python 3.14."
65-
)
58+
raise ImportError(
59+
"mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
60+
)
6661

6762
repo_cache_folder = vlm_options.repo_id.replace("/", "--")
6863

docs/examples/advanced_chunking_and_serialization.ipynb

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@
419419
},
420420
{
421421
"cell_type": "code",
422-
"execution_count": 8,
422+
"execution_count": null,
423423
"metadata": {},
424424
"outputs": [],
425425
"source": [
@@ -451,19 +451,18 @@
451451
" **kwargs: Any,\n",
452452
" ) -> SerializationResult:\n",
453453
" text_parts: list[str] = []\n",
454-
" for annotation in item.annotations:\n",
455-
" if isinstance(annotation, PictureClassificationData):\n",
456-
" predicted_class = (\n",
457-
" annotation.predicted_classes[0].class_name\n",
458-
" if annotation.predicted_classes\n",
459-
" else None\n",
460-
" )\n",
461-
" if predicted_class is not None:\n",
462-
" text_parts.append(f\"Picture type: {predicted_class}\")\n",
463-
" elif isinstance(annotation, PictureMoleculeData):\n",
464-
" text_parts.append(f\"SMILES: {annotation.smi}\")\n",
465-
" elif isinstance(annotation, PictureDescriptionData):\n",
466-
" text_parts.append(f\"Picture description: {annotation.text}\")\n",
454+
"\n",
455+
" if item.meta is not None:\n",
456+
" if item.meta.classification is not None:\n",
457+
" main_pred = item.meta.classification.get_main_prediction()\n",
458+
" if main_pred is not None:\n",
459+
" text_parts.append(f\"Picture type: {main_pred.class_name}\")\n",
460+
"\n",
461+
" if item.meta.molecule is not None:\n",
462+
" text_parts.append(f\"SMILES: {item.meta.molecule.smi}\")\n",
463+
"\n",
464+
" if item.meta.description is not None:\n",
465+
" text_parts.append(f\"Picture description: {item.meta.description.text}\")\n",
467466
"\n",
468467
" text_res = \"\\n\".join(text_parts)\n",
469468
" text_res = doc_serializer.post_process(text=text_res)\n",

docs/examples/custom_convert.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from docling.datamodel.base_models import InputFormat
4444
from docling.datamodel.pipeline_options import (
4545
PdfPipelineOptions,
46+
TableStructureOptions,
4647
)
4748
from docling.document_converter import DocumentConverter, PdfFormatOption
4849

@@ -65,7 +66,7 @@ def main():
6566
# pipeline_options = PdfPipelineOptions()
6667
# pipeline_options.do_ocr = False
6768
# pipeline_options.do_table_structure = True
68-
# pipeline_options.table_structure_options.do_cell_matching = False
69+
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=False)
6970

7071
# doc_converter = DocumentConverter(
7172
# format_options={
@@ -80,7 +81,7 @@ def main():
8081
# pipeline_options = PdfPipelineOptions()
8182
# pipeline_options.do_ocr = True
8283
# pipeline_options.do_table_structure = True
83-
# pipeline_options.table_structure_options.do_cell_matching = True
84+
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=True)
8485

8586
# doc_converter = DocumentConverter(
8687
# format_options={
@@ -95,7 +96,7 @@ def main():
9596
# pipeline_options = PdfPipelineOptions()
9697
# pipeline_options.do_ocr = False
9798
# pipeline_options.do_table_structure = True
98-
# pipeline_options.table_structure_options.do_cell_matching = True
99+
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=True)
99100

100101
# doc_converter = DocumentConverter(
101102
# format_options={
@@ -110,7 +111,9 @@ def main():
110111
pipeline_options = PdfPipelineOptions()
111112
pipeline_options.do_ocr = True
112113
pipeline_options.do_table_structure = True
113-
pipeline_options.table_structure_options.do_cell_matching = True
114+
pipeline_options.table_structure_options = TableStructureOptions(
115+
do_cell_matching=True
116+
)
114117
pipeline_options.ocr_options.lang = ["es"]
115118
pipeline_options.accelerator_options = AcceleratorOptions(
116119
num_threads=4, device=AcceleratorDevice.AUTO
@@ -128,7 +131,7 @@ def main():
128131
# pipeline_options.do_ocr = True
129132
# pipeline_options.ocr_options.use_gpu = False # <-- set this.
130133
# pipeline_options.do_table_structure = True
131-
# pipeline_options.table_structure_options.do_cell_matching = True
134+
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=True)
132135

133136
# doc_converter = DocumentConverter(
134137
# format_options={
@@ -141,7 +144,7 @@ def main():
141144
# pipeline_options = PdfPipelineOptions()
142145
# pipeline_options.do_ocr = True
143146
# pipeline_options.do_table_structure = True
144-
# pipeline_options.table_structure_options.do_cell_matching = True
147+
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=True)
145148
# pipeline_options.ocr_options = TesseractOcrOptions()
146149

147150
# doc_converter = DocumentConverter(
@@ -155,7 +158,7 @@ def main():
155158
# pipeline_options = PdfPipelineOptions()
156159
# pipeline_options.do_ocr = True
157160
# pipeline_options.do_table_structure = True
158-
# pipeline_options.table_structure_options.do_cell_matching = True
161+
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=True)
159162
# pipeline_options.ocr_options = TesseractCliOcrOptions()
160163

161164
# doc_converter = DocumentConverter(
@@ -169,7 +172,7 @@ def main():
169172
# pipeline_options = PdfPipelineOptions()
170173
# pipeline_options.do_ocr = True
171174
# pipeline_options.do_table_structure = True
172-
# pipeline_options.table_structure_options.do_cell_matching = True
175+
# pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=True)
173176
# pipeline_options.ocr_options = OcrMacOptions()
174177

175178
# doc_converter = DocumentConverter(

0 commit comments

Comments
 (0)