Skip to content

Commit e8f7755

Browse files
authored
Fix: solved embedded image serialization/deserialization with Pydantic. (#95)
Signed-off-by: Eric Musa <[email protected]>
1 parent fd7529f commit e8f7755

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

docling_core/types/doc/document.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ class ImageRef(BaseModel):
445445
mimetype: str
446446
dpi: int
447447
size: Size
448-
uri: Union[AnyUrl, Path]
448+
uri: Union[AnyUrl, Path] = Field(union_mode="left_to_right")
449449
_pil: Optional[PILImage.Image] = None
450450

451451
@property
@@ -1864,7 +1864,7 @@ def load_from_json(cls, filename: Path) -> "DoclingDocument":
18641864
18651865
"""
18661866
with open(filename, "r") as f:
1867-
return cls.model_validate(json.loads(f.read()))
1867+
return cls.model_validate_json(f.read())
18681868

18691869
def save_as_yaml(
18701870
self,

test/test_docling_doc.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,18 @@ def _verify_saved_output(filename: str, paths: List[Path]):
693693
assert pred == gt, f"pred!=gt for {filename}"
694694

695695

696+
def _verify_loaded_output(filename: str, pred=None):
697+
gt = DoclingDocument.load_from_json(Path(str(filename) + ".gt"))
698+
699+
pred = pred or DoclingDocument.load_from_json(Path(filename))
700+
assert isinstance(pred, DoclingDocument)
701+
702+
assert (
703+
pred.export_to_dict() == gt.export_to_dict()
704+
), f"pred.export_to_dict() != gt.export_to_dict() for {filename}"
705+
assert pred == gt, f"pred!=gt for {filename}"
706+
707+
696708
def test_save_to_disk():
697709

698710
doc: DoclingDocument = _construct_doc()
@@ -761,12 +773,18 @@ def test_save_to_disk():
761773
)
762774
_verify_saved_output(filename=filename, paths=paths)
763775

776+
doc_emb_loaded = DoclingDocument.load_from_json(filename)
777+
_verify_loaded_output(filename=filename, pred=doc_emb_loaded)
778+
764779
filename = Path("test/data/doc/constructed_doc.referenced.json")
765780
doc.save_as_json(
766781
filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.REFERENCED
767782
)
768783
_verify_saved_output(filename=filename, paths=paths)
769784

785+
doc_ref_loaded = DoclingDocument.load_from_json(filename)
786+
_verify_loaded_output(filename=filename, pred=doc_ref_loaded)
787+
770788
### YAML
771789

772790
filename = Path("test/data/doc/constructed_doc.embedded.yaml")

0 commit comments

Comments
 (0)