Skip to content

Commit 4227fcc

Browse files
authored
fix(markdown): set the correct discriminator in md backend options (#2501)
Signed-off-by: Cesar Berrospi Ramis <[email protected]>
1 parent a30e6a7 commit 4227fcc

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

docling/backend/md_backend.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,11 @@ def supports_pagination(cls) -> bool:
536536
def supported_formats(cls) -> set[InputFormat]:
537537
return {InputFormat.MD}
538538

539+
@classmethod
540+
@override
541+
def get_default_options(cls) -> MarkdownBackendOptions:
542+
return MarkdownBackendOptions()
543+
539544
def convert(self) -> DoclingDocument:
540545
_log.debug("converting Markdown...")
541546

@@ -587,17 +592,24 @@ def _restore_original_html(txt, regex):
587592
self._html_blocks = 0
588593
# delegate to HTML backend
589594
stream = BytesIO(bytes(html_str, encoding="utf-8"))
595+
md_options = cast(MarkdownBackendOptions, self.options)
596+
html_options = HTMLBackendOptions(
597+
enable_local_fetch=md_options.enable_local_fetch,
598+
enable_remote_fetch=md_options.enable_remote_fetch,
599+
fetch_images=md_options.fetch_images,
600+
source_uri=md_options.source_uri,
601+
)
590602
in_doc = InputDocument(
591603
path_or_stream=stream,
592604
format=InputFormat.HTML,
593605
backend=html_backend_cls,
594606
filename=self.file.name,
595-
backend_options=self.options,
607+
backend_options=html_options,
596608
)
597609
html_backend_obj = html_backend_cls(
598610
in_doc=in_doc,
599611
path_or_stream=stream,
600-
options=cast(HTMLBackendOptions, self.options),
612+
options=html_options,
601613
)
602614
doc = html_backend_obj.convert()
603615
else:

docling/datamodel/backend_options.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,27 @@ class HTMLBackendOptions(BaseBackendOptions):
4444
)
4545

4646

47-
class MarkdownBackendOptions(HTMLBackendOptions):
47+
class MarkdownBackendOptions(BaseBackendOptions):
4848
"""Options specific to the Markdown backend."""
4949

50+
kind: Literal["md"] = Field("md", exclude=True, repr=False)
51+
fetch_images: bool = Field(
52+
False,
53+
description=(
54+
"Whether the backend should access remote or local resources to parse "
55+
"images in the markdown document."
56+
),
57+
)
58+
source_uri: Optional[Union[AnyUrl, PurePath]] = Field(
59+
None,
60+
description=(
61+
"The URI that originates the markdown document. If provided, the backend "
62+
"will use it to resolve relative paths in the markdown document."
63+
),
64+
)
65+
5066

5167
BackendOptions = Annotated[
52-
Union[DeclarativeBackendOptions, HTMLBackendOptions], Field(discriminator="kind")
68+
Union[DeclarativeBackendOptions, HTMLBackendOptions, MarkdownBackendOptions],
69+
Field(discriminator="kind"),
5370
]

0 commit comments

Comments
 (0)