Skip to content

Commit ec588df

Browse files
authored
feat: enable precision control in float serialization (#1914)
* chore: propagate precision control in float serialization Signed-off-by: Panos Vagenas <[email protected]> * parametrize float serialization, propagate core updates Signed-off-by: Panos Vagenas <[email protected]> * update test float precision Signed-off-by: Panos Vagenas <[email protected]> * repin docling-core Signed-off-by: Panos Vagenas <[email protected]> --------- Signed-off-by: Panos Vagenas <[email protected]>
1 parent 931eb55 commit ec588df

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+364215
-368380
lines changed

docling/datamodel/base_models.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,22 @@
1212
Size,
1313
TableCell,
1414
)
15+
from docling_core.types.doc.base import PydanticSerCtxKey, round_pydantic_float
1516
from docling_core.types.doc.page import SegmentedPdfPage, TextCell
1617
from docling_core.types.io import (
1718
DocumentStream,
1819
)
1920

2021
# DO NOT REMOVE; explicitly exposed from this location
2122
from PIL.Image import Image
22-
from pydantic import BaseModel, ConfigDict, Field, computed_field
23+
from pydantic import (
24+
BaseModel,
25+
ConfigDict,
26+
Field,
27+
FieldSerializationInfo,
28+
computed_field,
29+
field_serializer,
30+
)
2331

2432
if TYPE_CHECKING:
2533
from docling.backend.pdf_backend import PdfPageBackend
@@ -142,6 +150,10 @@ class Cluster(BaseModel):
142150
cells: List[TextCell] = []
143151
children: List["Cluster"] = [] # Add child cluster support
144152

153+
@field_serializer("confidence")
154+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
155+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
156+
145157

146158
class BasePageElement(BaseModel):
147159
label: DocItemLabel
@@ -194,6 +206,16 @@ class FigureElement(BasePageElement):
194206
predicted_class: Optional[str] = None
195207
confidence: Optional[float] = None
196208

209+
@field_serializer("confidence")
210+
def _serialize(
211+
self, value: Optional[float], info: FieldSerializationInfo
212+
) -> Optional[float]:
213+
return (
214+
round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
215+
if value is not None
216+
else None
217+
)
218+
197219

198220
class FigureClassificationPrediction(BaseModel):
199221
figure_count: int = 0

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ authors = [
4444
requires-python = '>=3.9,<4.0'
4545
dependencies = [
4646
'pydantic (>=2.0.0,<3.0.0)',
47-
'docling-core[chunking] (>=2.40.0,<3.0.0)',
47+
'docling-core[chunking] (>=2.42.0,<3.0.0)',
4848
'docling-parse (>=4.0.0,<5.0.0)',
4949
'docling-ibm-models (>=3.6.0,<4)',
5050
'filetype (>=1.2.0,<2.0.0)',

0 commit comments

Comments
 (0)