Skip to content

Commit baa2cc3

Browse files
authored
feat: enable precision control in float serialization (#352)
Signed-off-by: Panos Vagenas <[email protected]>
1 parent bf88e9d commit baa2cc3

File tree

6 files changed

+587
-12
lines changed

6 files changed

+587
-12
lines changed

docling_core/types/doc/base.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from enum import Enum
44
from typing import List, Tuple
55

6-
from pydantic import BaseModel
6+
from pydantic import BaseModel, FieldSerializationInfo, field_serializer
77

88

99
class ImageRefMode(str, Enum):
@@ -21,12 +21,28 @@ class CoordOrigin(str, Enum):
2121
BOTTOMLEFT = "BOTTOMLEFT"
2222

2323

24+
_CTX_COORD_PREC = "coord_prec"
25+
26+
27+
def _serialize_precision(
28+
value: float, info: FieldSerializationInfo, ctx_key: str
29+
) -> float:
30+
precision = info.context.get(ctx_key) if info.context else None
31+
if isinstance(precision, int):
32+
return round(value, precision)
33+
return value
34+
35+
2436
class Size(BaseModel):
2537
"""Size."""
2638

2739
width: float = 0.0
2840
height: float = 0.0
2941

42+
@field_serializer("width", "height")
43+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
44+
return _serialize_precision(value, info, _CTX_COORD_PREC)
45+
3046
def as_tuple(self):
3147
"""as_tuple."""
3248
return (self.width, self.height)
@@ -52,6 +68,10 @@ def height(self):
5268
"""height."""
5369
return abs(self.t - self.b)
5470

71+
@field_serializer("l", "t", "r", "b")
72+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
73+
return _serialize_precision(value, info, _CTX_COORD_PREC)
74+
5575
def resize_by_scale(self, x_scale: float, y_scale: float):
5676
"""resize_by_scale."""
5777
return BoundingBox(

docling_core/types/doc/document.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
BaseModel,
2727
ConfigDict,
2828
Field,
29+
FieldSerializationInfo,
2930
StringConstraints,
3031
computed_field,
32+
field_serializer,
3133
field_validator,
3234
model_validator,
3335
validate_call,
@@ -38,7 +40,12 @@
3840
from docling_core.search.package import VERSION_PATTERN
3941
from docling_core.types.base import _JSON_POINTER_REGEX
4042
from docling_core.types.doc import BoundingBox, Size
41-
from docling_core.types.doc.base import CoordOrigin, ImageRefMode
43+
from docling_core.types.doc.base import (
44+
_CTX_COORD_PREC,
45+
CoordOrigin,
46+
ImageRefMode,
47+
_serialize_precision,
48+
)
4249
from docling_core.types.doc.labels import (
4350
CodeLanguageLabel,
4451
DocItemLabel,
@@ -85,6 +92,8 @@
8592
]
8693
)
8794

95+
_CTX_CONFID_PREC = "confid_prec"
96+
8897

8998
class BaseAnnotation(BaseModel):
9099
"""Base class for all annotation types."""
@@ -98,6 +107,10 @@ class PictureClassificationClass(BaseModel):
98107
class_name: str
99108
confidence: float
100109

110+
@field_serializer("confidence")
111+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
112+
return _serialize_precision(value, info, _CTX_CONFID_PREC)
113+
101114

102115
class PictureClassificationData(BaseAnnotation):
103116
"""PictureClassificationData."""
@@ -125,6 +138,10 @@ class PictureMoleculeData(BaseAnnotation):
125138
segmentation: List[Tuple[float, float]]
126139
provenance: str
127140

141+
@field_serializer("confidence")
142+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
143+
return _serialize_precision(value, info, _CTX_CONFID_PREC)
144+
128145

129146
class MiscAnnotation(BaseAnnotation):
130147
"""MiscAnnotation."""
@@ -3048,6 +3065,8 @@ def save_as_json(
30483065
artifacts_dir: Optional[Path] = None,
30493066
image_mode: ImageRefMode = ImageRefMode.EMBEDDED,
30503067
indent: int = 2,
3068+
coord_precision: Optional[int] = None,
3069+
confid_precision: Optional[int] = None,
30513070
):
30523071
"""Save as json."""
30533072
if isinstance(filename, str):
@@ -3061,7 +3080,9 @@ def save_as_json(
30613080
artifacts_dir, image_mode, reference_path=reference_path
30623081
)
30633082

3064-
out = new_doc.export_to_dict()
3083+
out = new_doc.export_to_dict(
3084+
coord_precision=coord_precision, confid_precision=confid_precision
3085+
)
30653086
with open(filename, "w", encoding="utf-8") as fw:
30663087
json.dump(out, fw, indent=indent)
30673088

@@ -3087,6 +3108,8 @@ def save_as_yaml(
30873108
artifacts_dir: Optional[Path] = None,
30883109
image_mode: ImageRefMode = ImageRefMode.EMBEDDED,
30893110
default_flow_style: bool = False,
3111+
coord_precision: Optional[int] = None,
3112+
confid_precision: Optional[int] = None,
30903113
):
30913114
"""Save as yaml."""
30923115
if isinstance(filename, str):
@@ -3100,7 +3123,9 @@ def save_as_yaml(
31003123
artifacts_dir, image_mode, reference_path=reference_path
31013124
)
31023125

3103-
out = new_doc.export_to_dict()
3126+
out = new_doc.export_to_dict(
3127+
coord_precision=coord_precision, confid_precision=confid_precision
3128+
)
31043129
with open(filename, "w", encoding="utf-8") as fw:
31053130
yaml.dump(out, fw, default_flow_style=default_flow_style)
31063131

@@ -3125,9 +3150,18 @@ def export_to_dict(
31253150
mode: str = "json",
31263151
by_alias: bool = True,
31273152
exclude_none: bool = True,
3153+
coord_precision: Optional[int] = None,
3154+
confid_precision: Optional[int] = None,
31283155
) -> Dict[str, Any]:
31293156
"""Export to dict."""
3130-
out = self.model_dump(mode=mode, by_alias=by_alias, exclude_none=exclude_none)
3157+
context = {}
3158+
if coord_precision is not None:
3159+
context[_CTX_COORD_PREC] = coord_precision
3160+
if confid_precision is not None:
3161+
context[_CTX_CONFID_PREC] = confid_precision
3162+
out = self.model_dump(
3163+
mode=mode, by_alias=by_alias, exclude_none=exclude_none, context=context
3164+
)
31313165

31323166
return out
31333167

docling_core/types/doc/page.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,21 @@
2525
from PIL import Image as PILImage
2626
from PIL import ImageColor, ImageDraw, ImageFont
2727
from PIL.ImageFont import FreeTypeFont
28-
from pydantic import AnyUrl, BaseModel, Field, model_validator
28+
from pydantic import (
29+
AnyUrl,
30+
BaseModel,
31+
Field,
32+
FieldSerializationInfo,
33+
field_serializer,
34+
model_validator,
35+
)
2936

30-
from docling_core.types.doc.base import BoundingBox, CoordOrigin
37+
from docling_core.types.doc.base import (
38+
_CTX_COORD_PREC,
39+
BoundingBox,
40+
CoordOrigin,
41+
_serialize_precision,
42+
)
3143
from docling_core.types.doc.document import ImageRef
3244

3345
_logger = logging.getLogger(__name__)
@@ -105,6 +117,10 @@ class BoundingRectangle(BaseModel):
105117

106118
coord_origin: CoordOrigin = CoordOrigin.BOTTOMLEFT
107119

120+
@field_serializer("r_x0", "r_y0", "r_x1", "r_y1", "r_x2", "r_y2", "r_x3", "r_y3")
121+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
122+
return _serialize_precision(value, info, _CTX_COORD_PREC)
123+
108124
@property
109125
def width(self) -> float:
110126
"""Calculate the width of the rectangle."""

0 commit comments

Comments
 (0)