Skip to content

Commit c339171

Browse files
authored
feat: extend and expose float serialization control (#353)
* feat: add confidence precision control to page text cells Signed-off-by: Panos Vagenas <[email protected]> * make controls public Signed-off-by: Panos Vagenas <[email protected]> --------- Signed-off-by: Panos Vagenas <[email protected]>
1 parent cb59fd3 commit c339171

File tree

3 files changed

+30
-21
lines changed

3 files changed

+30
-21
lines changed

docling_core/types/doc/base.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Models for the base data types."""
22

33
from enum import Enum
4-
from typing import List, Tuple
4+
from typing import Any, List, Tuple
55

66
from pydantic import BaseModel, FieldSerializationInfo, field_serializer
77

@@ -21,16 +21,23 @@ class CoordOrigin(str, Enum):
2121
BOTTOMLEFT = "BOTTOMLEFT"
2222

2323

24-
_CTX_COORD_PREC = "coord_prec"
24+
class PydanticSerCtxKey(str, Enum):
25+
"""Pydantic serialization context keys."""
2526

27+
COORD_PREC = "coord_prec" # key for coordinates precision
28+
CONFID_PREC = "confid_prec" # key for confidence values precision
2629

27-
def _serialize_precision(
28-
value: float, info: FieldSerializationInfo, ctx_key: str
30+
31+
def round_pydantic_float(
32+
val: float, ctx: Any, precision_ctx_key: PydanticSerCtxKey
2933
) -> float:
30-
precision = info.context.get(ctx_key) if info.context else None
31-
if isinstance(precision, int):
32-
return round(value, precision)
33-
return value
34+
"""Round float, provided the precision is available in the context."""
35+
precision = (
36+
ctx.get(precision_ctx_key.value)
37+
if isinstance(ctx, dict)
38+
else getattr(ctx, precision_ctx_key.value, None)
39+
)
40+
return round(val, precision) if isinstance(precision, int) else val
3441

3542

3643
class Size(BaseModel):
@@ -41,7 +48,7 @@ class Size(BaseModel):
4148

4249
@field_serializer("width", "height")
4350
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
44-
return _serialize_precision(value, info, _CTX_COORD_PREC)
51+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.COORD_PREC)
4552

4653
def as_tuple(self):
4754
"""as_tuple."""
@@ -70,7 +77,7 @@ def height(self):
7077

7178
@field_serializer("l", "t", "r", "b")
7279
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
73-
return _serialize_precision(value, info, _CTX_COORD_PREC)
80+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.COORD_PREC)
7481

7582
def resize_by_scale(self, x_scale: float, y_scale: float):
7683
"""resize_by_scale."""

docling_core/types/doc/document.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@
4141
from docling_core.types.base import _JSON_POINTER_REGEX
4242
from docling_core.types.doc import BoundingBox, Size
4343
from docling_core.types.doc.base import (
44-
_CTX_COORD_PREC,
4544
CoordOrigin,
4645
ImageRefMode,
47-
_serialize_precision,
46+
PydanticSerCtxKey,
47+
round_pydantic_float,
4848
)
4949
from docling_core.types.doc.labels import (
5050
CodeLanguageLabel,
@@ -92,8 +92,6 @@
9292
]
9393
)
9494

95-
_CTX_CONFID_PREC = "confid_prec"
96-
9795

9896
class BaseAnnotation(BaseModel):
9997
"""Base class for all annotation types."""
@@ -109,7 +107,7 @@ class PictureClassificationClass(BaseModel):
109107

110108
@field_serializer("confidence")
111109
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
112-
return _serialize_precision(value, info, _CTX_CONFID_PREC)
110+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
113111

114112

115113
class PictureClassificationData(BaseAnnotation):
@@ -140,7 +138,7 @@ class PictureMoleculeData(BaseAnnotation):
140138

141139
@field_serializer("confidence")
142140
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
143-
return _serialize_precision(value, info, _CTX_CONFID_PREC)
141+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
144142

145143

146144
class MiscAnnotation(BaseAnnotation):
@@ -4292,9 +4290,9 @@ def export_to_dict(
42924290
"""Export to dict."""
42934291
context = {}
42944292
if coord_precision is not None:
4295-
context[_CTX_COORD_PREC] = coord_precision
4293+
context[PydanticSerCtxKey.COORD_PREC.value] = coord_precision
42964294
if confid_precision is not None:
4297-
context[_CTX_CONFID_PREC] = confid_precision
4295+
context[PydanticSerCtxKey.CONFID_PREC.value] = confid_precision
42984296
out = self.model_dump(
42994297
mode=mode, by_alias=by_alias, exclude_none=exclude_none, context=context
43004298
)

docling_core/types/doc/page.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@
3535
)
3636

3737
from docling_core.types.doc.base import (
38-
_CTX_COORD_PREC,
3938
BoundingBox,
4039
CoordOrigin,
41-
_serialize_precision,
40+
PydanticSerCtxKey,
41+
round_pydantic_float,
4242
)
4343
from docling_core.types.doc.document import ImageRef
4444

@@ -119,7 +119,7 @@ class BoundingRectangle(BaseModel):
119119

120120
@field_serializer("r_x0", "r_y0", "r_x1", "r_y1", "r_x2", "r_y2", "r_x3", "r_y3")
121121
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
122-
return _serialize_precision(value, info, _CTX_COORD_PREC)
122+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.COORD_PREC)
123123

124124
@property
125125
def width(self) -> float:
@@ -290,6 +290,10 @@ class TextCell(ColorMixin, OrderedElement):
290290
confidence: float = 1.0
291291
from_ocr: bool
292292

293+
@field_serializer("confidence")
294+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
295+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
296+
293297
def to_bounding_box(self) -> BoundingBox:
294298
"""Convert the cell rectangle to a BoundingBox."""
295299
return self.rect.to_bounding_box()

0 commit comments

Comments
 (0)