3131from docling .backend .webvtt_backend import WebVTTDocumentBackend
3232from docling .backend .xml .jats_backend import JatsDocumentBackend
3333from docling .backend .xml .uspto_backend import PatentUsptoDocumentBackend
34- from docling .datamodel .backend_options import BackendOptions , HTMLBackendOptions
34+ from docling .datamodel .backend_options import (
35+ BackendOptions ,
36+ HTMLBackendOptions ,
37+ MarkdownBackendOptions ,
38+ PdfBackendOptions ,
39+ )
3540from docling .datamodel .base_models import (
3641 BaseFormatOption ,
3742 ConversionStatus ,
@@ -98,7 +103,7 @@ class PowerpointFormatOption(FormatOption):
98103class MarkdownFormatOption (FormatOption ):
99104 pipeline_cls : Type = SimplePipeline
100105 backend : Type [AbstractDocumentBackend ] = MarkdownDocumentBackend
101- backend_options : HTMLBackendOptions = HTMLBackendOptions ()
106+ backend_options : Optional [ MarkdownBackendOptions ] = None
102107
103108
104109class AsciiDocFormatOption (FormatOption ):
@@ -109,7 +114,7 @@ class AsciiDocFormatOption(FormatOption):
109114class HTMLFormatOption (FormatOption ):
110115 pipeline_cls : Type = SimplePipeline
111116 backend : Type [AbstractDocumentBackend ] = HTMLDocumentBackend
112- backend_options : HTMLBackendOptions = HTMLBackendOptions ()
117+ backend_options : Optional [ HTMLBackendOptions ] = None
113118
114119
115120class PatentUsptoFormatOption (FormatOption ):
@@ -130,6 +135,7 @@ class ImageFormatOption(FormatOption):
130135class PdfFormatOption (FormatOption ):
131136 pipeline_cls : Type = StandardPdfPipeline
132137 backend : Type [AbstractDocumentBackend ] = DoclingParseV4DocumentBackend
138+ backend_options : Optional [PdfBackendOptions ] = None
133139
134140
135141class AudioFormatOption (FormatOption ):
@@ -139,48 +145,24 @@ class AudioFormatOption(FormatOption):
139145
140146def _get_default_option (format : InputFormat ) -> FormatOption :
141147 format_to_default_options = {
142- InputFormat .CSV : FormatOption (
143- pipeline_cls = SimplePipeline , backend = CsvDocumentBackend
144- ),
145- InputFormat .XLSX : FormatOption (
146- pipeline_cls = SimplePipeline , backend = MsExcelDocumentBackend
147- ),
148- InputFormat .DOCX : FormatOption (
149- pipeline_cls = SimplePipeline , backend = MsWordDocumentBackend
150- ),
151- InputFormat .PPTX : FormatOption (
152- pipeline_cls = SimplePipeline , backend = MsPowerpointDocumentBackend
153- ),
154- InputFormat .MD : FormatOption (
155- pipeline_cls = SimplePipeline , backend = MarkdownDocumentBackend
156- ),
157- InputFormat .ASCIIDOC : FormatOption (
158- pipeline_cls = SimplePipeline , backend = AsciiDocBackend
159- ),
160- InputFormat .HTML : FormatOption (
161- pipeline_cls = SimplePipeline ,
162- backend = HTMLDocumentBackend ,
163- backend_options = HTMLBackendOptions (),
164- ),
165- InputFormat .XML_USPTO : FormatOption (
166- pipeline_cls = SimplePipeline , backend = PatentUsptoDocumentBackend
167- ),
168- InputFormat .XML_JATS : FormatOption (
169- pipeline_cls = SimplePipeline , backend = JatsDocumentBackend
170- ),
148+ InputFormat .CSV : CsvFormatOption (),
149+ InputFormat .XLSX : ExcelFormatOption (),
150+ InputFormat .DOCX : WordFormatOption (),
151+ InputFormat .PPTX : PowerpointFormatOption (),
152+ InputFormat .MD : MarkdownFormatOption (),
153+ InputFormat .ASCIIDOC : AsciiDocFormatOption (),
154+ InputFormat .HTML : HTMLFormatOption (),
155+ InputFormat .XML_USPTO : PatentUsptoFormatOption (),
156+ InputFormat .XML_JATS : XMLJatsFormatOption (),
171157 InputFormat .METS_GBS : FormatOption (
172158 pipeline_cls = StandardPdfPipeline , backend = MetsGbsDocumentBackend
173159 ),
174- InputFormat .IMAGE : FormatOption (
175- pipeline_cls = StandardPdfPipeline , backend = DoclingParseV4DocumentBackend
176- ),
177- InputFormat .PDF : FormatOption (
178- pipeline_cls = StandardPdfPipeline , backend = DoclingParseV4DocumentBackend
179- ),
160+ InputFormat .IMAGE : ImageFormatOption (),
161+ InputFormat .PDF : PdfFormatOption (),
180162 InputFormat .JSON_DOCLING : FormatOption (
181163 pipeline_cls = SimplePipeline , backend = DoclingJSONBackend
182164 ),
183- InputFormat .AUDIO : FormatOption ( pipeline_cls = AsrPipeline , backend = NoOpBackend ),
165+ InputFormat .AUDIO : AudioFormatOption ( ),
184166 InputFormat .VTT : FormatOption (
185167 pipeline_cls = SimplePipeline , backend = WebVTTDocumentBackend
186168 ),
0 commit comments