Skip to content

Commit 4a174b5

Browse files
authored
chore: fix deprecation warnings (#303)
* chore: fix deprecation warnings Signed-off-by: Cesar Berrospi Ramis <[email protected]> * chore: disregard deprecated captions from hierarchical chunker in hybrid chunker Signed-off-by: Cesar Berrospi Ramis <[email protected]> * chore: update poetry lock file Signed-off-by: Cesar Berrospi Ramis <[email protected]> --------- Signed-off-by: Cesar Berrospi Ramis <[email protected]>
1 parent b021374 commit 4a174b5

File tree

7 files changed

+967
-909
lines changed

7 files changed

+967
-909
lines changed

docling_core/transforms/chunker/hybrid_chunker.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ def _make_chunk_from_doc_items(
156156
meta = DocMeta(
157157
doc_items=doc_items,
158158
headings=doc_chunk.meta.headings,
159-
captions=doc_chunk.meta.captions,
160159
origin=doc_chunk.meta.origin,
161160
)
162161
window_text = (
@@ -235,7 +234,9 @@ def _split_using_plain_text(
235234
)
236235
if available_length <= 0:
237236
warnings.warn(
238-
f"Headers and captions for this chunk are longer than the total amount of size for the chunk, chunk will be ignored: {doc_chunk.text=}" # noqa
237+
"Headers and captions for this chunk are longer than the total "
238+
"amount of size for the chunk, chunk will be ignored: "
239+
f"{doc_chunk.text=}"
239240
)
240241
return []
241242
text = doc_chunk.text
@@ -250,10 +251,10 @@ def _merge_chunks_with_matching_metadata(self, chunks: list[DocChunk]):
250251
num_chunks = len(chunks)
251252
while window_end < num_chunks:
252253
chunk = chunks[window_end]
253-
headings_and_captions = (chunk.meta.headings, chunk.meta.captions)
254+
headings = chunk.meta.headings
254255
ready_to_append = False
255256
if window_start == window_end:
256-
current_headings_and_captions = headings_and_captions
257+
current_headings = headings
257258
window_end += 1
258259
first_chunk_of_window = chunk
259260
else:
@@ -264,13 +265,12 @@ def _merge_chunks_with_matching_metadata(self, chunks: list[DocChunk]):
264265
text=self.delim.join([chk.text for chk in chks]),
265266
meta=DocMeta(
266267
doc_items=doc_items,
267-
headings=current_headings_and_captions[0],
268-
captions=current_headings_and_captions[1],
268+
headings=current_headings,
269269
origin=chunk.meta.origin,
270270
),
271271
)
272272
if (
273-
headings_and_captions == current_headings_and_captions
273+
headings == current_headings
274274
and self._count_chunk_tokens(doc_chunk=candidate) <= self.max_tokens
275275
):
276276
# there is room to include the new chunk so add it to the window and

docling_core/transforms/serializer/common.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pathlib import Path
1212
from typing import Any, Iterable, Optional, Tuple, Union
1313

14-
from pydantic import AnyUrl, BaseModel, NonNegativeInt, computed_field
14+
from pydantic import AnyUrl, BaseModel, ConfigDict, NonNegativeInt, computed_field
1515
from typing_extensions import Self, override
1616

1717
from docling_core.transforms.serializer.base import (
@@ -176,11 +176,7 @@ def merge_with_patch(self, patch: dict[str, Any]) -> Self:
176176
class DocSerializer(BaseModel, BaseDocSerializer):
177177
"""Class for document serializers."""
178178

179-
class Config:
180-
"""Pydantic config."""
181-
182-
arbitrary_types_allowed = True
183-
extra = "forbid"
179+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid")
184180

185181
doc: DoclingDocument
186182

docling_core/types/doc/document.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import re
1212
import sys
1313
import typing
14+
import warnings
1415
from enum import Enum
1516
from io import BytesIO
1617
from pathlib import Path
@@ -4109,7 +4110,10 @@ def check_version_is_compatible(cls, v: str) -> str:
41094110
@classmethod
41104111
def validate_document(cls, d: "DoclingDocument"):
41114112
"""validate_document."""
4112-
if not d.validate_tree(d.body) or not d.validate_tree(d.furniture):
4113-
raise ValueError("Document hierachy is inconsistent.")
4113+
with warnings.catch_warnings():
4114+
# ignore warning from deprecated furniture
4115+
warnings.filterwarnings("ignore", category=DeprecationWarning)
4116+
if not d.validate_tree(d.body) or not d.validate_tree(d.furniture):
4117+
raise ValueError("Document hierachy is inconsistent.")
41144118

41154119
return d

docs/DoclingDocument.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,7 @@
14751475
"type": "string"
14761476
},
14771477
"content": {
1478+
"additionalProperties": true,
14781479
"title": "Content",
14791480
"type": "object"
14801481
}

0 commit comments

Comments
 (0)