Skip to content

Commit c30ada6

Browse files
authored
chore: Types fixes (#267)
* docs: Recommend installing all groups for mypy to work properly Signed-off-by: Eugene <[email protected]> * chore: Fix some incomplete type defs Signed-off-by: Eugene <[email protected]> --------- Signed-off-by: Eugene <[email protected]>
1 parent 763e136 commit c30ada6

File tree

12 files changed

+121
-85
lines changed

12 files changed

+121
-85
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ poetry shell
119119
To spawn a shell with the Virtual Environment activated. If the Virtual Environment doesn't exist, Poetry will create one for you. Then, to install dependencies, run:
120120
121121
```bash
122-
poetry install
122+
poetry install --all-extras
123123
```
124124
125125
**(Advanced) Use a Specific Python Version**

docling_core/experimental/serializer/base.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"""Define base classes for serialization."""
77
from abc import ABC, abstractmethod
88
from pathlib import Path
9-
from typing import Optional, Union
9+
from typing import Any, Optional, Union
1010

1111
from pydantic import AnyUrl, BaseModel
1212

@@ -51,7 +51,7 @@ def serialize(
5151
item: TextItem,
5252
doc_serializer: "BaseDocSerializer",
5353
doc: DoclingDocument,
54-
**kwargs,
54+
**kwargs: Any,
5555
) -> SerializationResult:
5656
"""Serializes the passed item."""
5757
...
@@ -67,7 +67,7 @@ def serialize(
6767
item: TableItem,
6868
doc_serializer: "BaseDocSerializer",
6969
doc: DoclingDocument,
70-
**kwargs,
70+
**kwargs: Any,
7171
) -> SerializationResult:
7272
"""Serializes the passed item."""
7373
...
@@ -83,7 +83,7 @@ def serialize(
8383
item: PictureItem,
8484
doc_serializer: "BaseDocSerializer",
8585
doc: DoclingDocument,
86-
**kwargs,
86+
**kwargs: Any,
8787
) -> SerializationResult:
8888
"""Serializes the passed item."""
8989
...
@@ -99,7 +99,7 @@ def serialize(
9999
item: KeyValueItem,
100100
doc_serializer: "BaseDocSerializer",
101101
doc: DoclingDocument,
102-
**kwargs,
102+
**kwargs: Any,
103103
) -> SerializationResult:
104104
"""Serializes the passed item."""
105105
...
@@ -115,7 +115,7 @@ def serialize(
115115
item: FormItem,
116116
doc_serializer: "BaseDocSerializer",
117117
doc: DoclingDocument,
118-
**kwargs,
118+
**kwargs: Any,
119119
) -> SerializationResult:
120120
"""Serializes the passed item."""
121121
...
@@ -131,7 +131,7 @@ def serialize(
131131
item: Union[UnorderedList, OrderedList],
132132
doc_serializer: "BaseDocSerializer",
133133
doc: DoclingDocument,
134-
**kwargs,
134+
**kwargs: Any,
135135
) -> SerializationResult:
136136
"""Serializes the passed item."""
137137
...
@@ -147,7 +147,7 @@ def serialize(
147147
item: InlineGroup,
148148
doc_serializer: "BaseDocSerializer",
149149
doc: DoclingDocument,
150-
**kwargs,
150+
**kwargs: Any,
151151
) -> SerializationResult:
152152
"""Serializes the passed item."""
153153
...
@@ -163,7 +163,7 @@ def serialize(
163163
item: NodeItem,
164164
doc_serializer: "BaseDocSerializer",
165165
doc: DoclingDocument,
166-
**kwargs,
166+
**kwargs: Any,
167167
) -> SerializationResult:
168168
"""Serializes the passed item."""
169169
...
@@ -174,34 +174,40 @@ class BaseDocSerializer(ABC):
174174

175175
@abstractmethod
176176
def serialize(
177-
self, *, item: Optional[NodeItem] = None, **kwargs
177+
self,
178+
*,
179+
item: Optional[NodeItem] = None,
180+
**kwargs: Any,
178181
) -> SerializationResult:
179182
"""Run the serialization."""
180183
...
181184

182185
@abstractmethod
183-
def serialize_bold(self, text: str, **kwargs) -> str:
186+
def serialize_bold(self, text: str, **kwargs: Any) -> str:
184187
"""Hook for bold formatting serialization."""
185188
...
186189

187190
@abstractmethod
188-
def serialize_italic(self, text: str, **kwargs) -> str:
191+
def serialize_italic(self, text: str, **kwargs: Any) -> str:
189192
"""Hook for italic formatting serialization."""
190193
...
191194

192195
@abstractmethod
193-
def serialize_underline(self, text: str, **kwargs) -> str:
196+
def serialize_underline(self, text: str, **kwargs: Any) -> str:
194197
"""Hook for underline formatting serialization."""
195198
...
196199

197200
@abstractmethod
198-
def serialize_strikethrough(self, text: str, **kwargs) -> str:
201+
def serialize_strikethrough(self, text: str, **kwargs: Any) -> str:
199202
"""Hook for strikethrough formatting serialization."""
200203
...
201204

202205
@abstractmethod
203206
def serialize_hyperlink(
204-
self, text: str, hyperlink: Union[AnyUrl, Path], **kwargs
207+
self,
208+
text: str,
209+
hyperlink: Union[AnyUrl, Path],
210+
**kwargs: Any,
205211
) -> str:
206212
"""Hook for hyperlink serialization."""
207213
...
@@ -210,7 +216,7 @@ def serialize_hyperlink(
210216
def get_parts(
211217
self,
212218
item: Optional[NodeItem] = None,
213-
**kwargs,
219+
**kwargs: Any,
214220
) -> list[SerializationResult]:
215221
"""Get the components to be combined for serializing this node."""
216222
...
@@ -219,7 +225,7 @@ def get_parts(
219225
def post_process(
220226
self,
221227
text: str,
222-
**kwargs,
228+
**kwargs: Any,
223229
) -> str:
224230
"""Apply some text post-processing steps."""
225231
...
@@ -228,13 +234,13 @@ def post_process(
228234
def serialize_captions(
229235
self,
230236
item: FloatingItem,
231-
**kwargs,
237+
**kwargs: Any,
232238
) -> SerializationResult:
233239
"""Serialize the item's captions."""
234240
...
235241

236242
@abstractmethod
237-
def get_excluded_refs(self, **kwargs) -> set[str]:
243+
def get_excluded_refs(self, **kwargs: Any) -> set[str]:
238244
"""Get references to excluded items."""
239245
...
240246

docling_core/experimental/serializer/common.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def _captions_of_some_item(self) -> set[str]:
214214
return refs
215215

216216
@override
217-
def get_excluded_refs(self, **kwargs) -> set[str]:
217+
def get_excluded_refs(self, **kwargs: Any) -> set[str]:
218218
"""References to excluded items."""
219219
params = self.params.merge_with_patch(patch=kwargs)
220220
params_json = params.model_dump_json()
@@ -252,7 +252,10 @@ def get_excluded_refs(self, **kwargs) -> set[str]:
252252

253253
@abstractmethod
254254
def serialize_doc(
255-
self, *, parts: list[SerializationResult], **kwargs
255+
self,
256+
*,
257+
parts: list[SerializationResult],
258+
**kwargs: Any,
256259
) -> SerializationResult:
257260
"""Serialize a document out of its pages."""
258261
...
@@ -271,7 +274,7 @@ def serialize(
271274
list_level: int = 0,
272275
is_inline_scope: bool = False,
273276
visited: Optional[set[str]] = None, # refs of visited items
274-
**kwargs,
277+
**kwargs: Any,
275278
) -> SerializationResult:
276279
"""Serialize a given node."""
277280
my_visited: set[str] = visited if visited is not None else set()
@@ -380,7 +383,7 @@ def get_parts(
380383
list_level: int = 0,
381384
is_inline_scope: bool = False,
382385
visited: Optional[set[str]] = None, # refs of visited items
383-
**kwargs,
386+
**kwargs: Any,
384387
) -> list[SerializationResult]:
385388
"""Get the components to be combined for serializing this node."""
386389
parts: list[SerializationResult] = []
@@ -415,7 +418,7 @@ def post_process(
415418
*,
416419
formatting: Optional[Formatting] = None,
417420
hyperlink: Optional[Union[AnyUrl, Path]] = None,
418-
**kwargs,
421+
**kwargs: Any,
419422
) -> str:
420423
"""Apply some text post-processing steps."""
421424
params = self.params.merge_with_patch(patch=kwargs)
@@ -434,28 +437,31 @@ def post_process(
434437
return res
435438

436439
@override
437-
def serialize_bold(self, text: str, **kwargs) -> str:
440+
def serialize_bold(self, text: str, **kwargs: Any) -> str:
438441
"""Hook for bold formatting serialization."""
439442
return text
440443

441444
@override
442-
def serialize_italic(self, text: str, **kwargs) -> str:
445+
def serialize_italic(self, text: str, **kwargs: Any) -> str:
443446
"""Hook for italic formatting serialization."""
444447
return text
445448

446449
@override
447-
def serialize_underline(self, text: str, **kwargs) -> str:
450+
def serialize_underline(self, text: str, **kwargs: Any) -> str:
448451
"""Hook for underline formatting serialization."""
449452
return text
450453

451454
@override
452-
def serialize_strikethrough(self, text: str, **kwargs) -> str:
455+
def serialize_strikethrough(self, text: str, **kwargs: Any) -> str:
453456
"""Hook for strikethrough formatting serialization."""
454457
return text
455458

456459
@override
457460
def serialize_hyperlink(
458-
self, text: str, hyperlink: Union[AnyUrl, Path], **kwargs
461+
self,
462+
text: str,
463+
hyperlink: Union[AnyUrl, Path],
464+
**kwargs: Any,
459465
) -> str:
460466
"""Hook for hyperlink serialization."""
461467
return text
@@ -464,7 +470,7 @@ def serialize_hyperlink(
464470
def serialize_captions(
465471
self,
466472
item: FloatingItem,
467-
**kwargs,
473+
**kwargs: Any,
468474
) -> SerializationResult:
469475
"""Serialize the item's captions."""
470476
params = self.params.merge_with_patch(patch=kwargs)

docling_core/experimental/serializer/doctags.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Define classes for Doctags serialization."""
22

33
from enum import Enum
4-
from typing import Dict, List, Optional, Union
4+
from typing import Any, Dict, List, Optional, Union
55

66
from pydantic import BaseModel
77
from typing_extensions import override
@@ -91,7 +91,7 @@ def serialize(
9191
item: TextItem,
9292
doc_serializer: BaseDocSerializer,
9393
doc: DoclingDocument,
94-
**kwargs,
94+
**kwargs: Any,
9595
) -> SerializationResult:
9696
"""Serializes the passed item."""
9797
from docling_core.types.doc.document import SectionHeaderItem
@@ -154,7 +154,7 @@ def serialize(
154154
item: TableItem,
155155
doc_serializer: BaseDocSerializer,
156156
doc: DoclingDocument,
157-
**kwargs,
157+
**kwargs: Any,
158158
) -> SerializationResult:
159159
"""Serializes the passed item."""
160160
params = DocTagsParams(**kwargs)
@@ -201,7 +201,7 @@ def serialize(
201201
item: PictureItem,
202202
doc_serializer: BaseDocSerializer,
203203
doc: DoclingDocument,
204-
**kwargs,
204+
**kwargs: Any,
205205
) -> SerializationResult:
206206
"""Serializes the passed item."""
207207
params = DocTagsParams(**kwargs)
@@ -284,7 +284,7 @@ def serialize(
284284
item: KeyValueItem,
285285
doc_serializer: "BaseDocSerializer",
286286
doc: DoclingDocument,
287-
**kwargs,
287+
**kwargs: Any,
288288
) -> SerializationResult:
289289
"""Serializes the passed item."""
290290
params = DocTagsParams(**kwargs)
@@ -356,7 +356,7 @@ def serialize(
356356
item: FormItem,
357357
doc_serializer: "BaseDocSerializer",
358358
doc: DoclingDocument,
359-
**kwargs,
359+
**kwargs: Any,
360360
) -> SerializationResult:
361361
"""Serializes the passed item."""
362362
# TODO add actual implementation
@@ -378,7 +378,7 @@ def serialize(
378378
list_level: int = 0,
379379
is_inline_scope: bool = False,
380380
visited: Optional[set[str]] = None, # refs of visited items
381-
**kwargs,
381+
**kwargs: Any,
382382
) -> SerializationResult:
383383
"""Serializes the passed item."""
384384
my_visited = visited if visited is not None else set()
@@ -423,7 +423,7 @@ def serialize(
423423
doc: DoclingDocument,
424424
list_level: int = 0,
425425
visited: Optional[set[str]] = None, # refs of visited items
426-
**kwargs,
426+
**kwargs: Any,
427427
) -> SerializationResult:
428428
"""Serializes the passed item."""
429429
my_visited = visited if visited is not None else set()
@@ -454,7 +454,7 @@ def serialize(
454454
item: NodeItem,
455455
doc_serializer: "BaseDocSerializer",
456456
doc: DoclingDocument,
457-
**kwargs,
457+
**kwargs: Any,
458458
) -> SerializationResult:
459459
"""Serializes the passed item."""
460460
return create_ser_result()
@@ -477,7 +477,10 @@ class DocTagsDocSerializer(DocSerializer):
477477

478478
@override
479479
def serialize_doc(
480-
self, *, parts: list[SerializationResult], **kwargs
480+
self,
481+
*,
482+
parts: list[SerializationResult],
483+
**kwargs: Any,
481484
) -> SerializationResult:
482485
"""Serialize a document out of its pages."""
483486
delim = _get_delim(params=self.params)
@@ -496,7 +499,7 @@ def serialize_doc(
496499
def serialize_captions(
497500
self,
498501
item: FloatingItem,
499-
**kwargs,
502+
**kwargs: Any,
500503
) -> SerializationResult:
501504
"""Serialize the item's captions."""
502505
params = DocTagsParams(**kwargs)

0 commit comments

Comments
 (0)