Skip to content

Commit fc1cfb0

Browse files
Eric-Musacau-git
andauthored
feat: simple method to load DoclingDocument from .json files (#71)
Signed-off-by: Eric Musa <[email protected]> Signed-off-by: Christoph Auer <[email protected]> Co-authored-by: Christoph Auer <[email protected]>
1 parent 3bd83bc commit fc1cfb0

File tree

1 file changed

+34
-18
lines changed

1 file changed

+34
-18
lines changed

docling_core/types/doc/document.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,6 +1760,20 @@ def save_as_json(
17601760
with open(filename, "w") as fw:
17611761
json.dump(out, fw, indent=indent)
17621762

1763+
@classmethod
1764+
def load_from_json(cls, filename: Path) -> "DoclingDocument":
1765+
"""load_from_json.
1766+
1767+
:param filename: The filename to load a saved DoclingDocument from a .json.
1768+
:type filename: Path
1769+
1770+
:returns: The loaded DoclingDocument.
1771+
:rtype: DoclingDocument
1772+
1773+
"""
1774+
with open(filename, "r") as f:
1775+
return cls.model_validate(json.loads(f.read()))
1776+
17631777
def save_as_yaml(
17641778
self,
17651779
filename: Path,
@@ -1852,26 +1866,28 @@ def export_to_markdown( # noqa: C901
18521866
from_element and to_element; defaulting to the whole document.
18531867
18541868
:param delim: Delimiter to use when concatenating the various
1855-
Markdown parts. Defaults to "\n\n".
1856-
:type delim: str
1869+
Markdown parts. (Default value = "\n").
1870+
:type delim: str = "\n"
18571871
:param from_element: Body slicing start index (inclusive).
1858-
Defaults to 0.
1859-
:type from_element: int
1872+
(Default value = 0).
1873+
:type from_element: int = 0
18601874
:param to_element: Body slicing stop index
1861-
(exclusive). Defaults to 0maxint.
1862-
:type to_element: int
1863-
:param delim: str: (Default value = "\n\n")
1864-
:param labels: set[DocItemLabel]
1865-
:param "subtitle-level-1":
1866-
:param "paragraph":
1867-
:param "caption":
1868-
:param "table":
1869-
:param "Text":
1870-
:param "text":
1871-
:param strict_text: bool: (Default value = False)
1872-
:param image_placeholder str: (Default value = "<!-- image -->")
1873-
the placeholder to include to position images in the markdown.
1874-
:param indent: int (default=4): indent of the nested lists
1875+
(exclusive). (Default value = maxint).
1876+
:type to_element: int = sys.maxsize
1877+
:param labels: The set of document labels to include in the export.
1878+
:type labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS
1879+
:param strict_text: bool: Whether to only include the text content
1880+
of the document. (Default value = False).
1881+
:type strict_text: bool = False
1882+
:param image_placeholder: The placeholder to include to position
1883+
images in the markdown. (Default value = "\<!-- image --\>").
1884+
:type image_placeholder: str = "<!-- image -->"
1885+
:param image_mode: The mode to use for including images in the
1886+
markdown. (Default value = ImageRefMode.PLACEHOLDER).
1887+
:type image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER
1888+
:param indent: The indent in spaces of the nested lists.
1889+
(Default value = 4).
1890+
:type indent: int = 4
18751891
:returns: The exported Markdown representation.
18761892
:rtype: str
18771893
"""

0 commit comments

Comments
 (0)