Skip to content

Commit b49d1ad

Browse files
feat: updating default parameters to get better performance with docling-parse (#2208)
* updated the code Signed-off-by: Peter Staar <[email protected]> * updated the parameters Signed-off-by: Peter Staar <[email protected]> --------- Signed-off-by: Peter Staar <[email protected]>
1 parent a9f41b0 commit b49d1ad

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

docling/backend/docling_parse_v4_backend.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,21 @@ def __init__(
3030
page_no: int,
3131
create_words: bool = True,
3232
create_textlines: bool = True,
33+
keep_chars: bool = False,
34+
keep_lines: bool = False,
35+
keep_images: bool = True,
3336
):
3437
self._ppage = page_obj
3538
self._dp_doc = dp_doc
3639
self._page_no = page_no
40+
3741
self._create_words = create_words
3842
self._create_textlines = create_textlines
3943

44+
self._keep_chars = keep_chars
45+
self._keep_lines = keep_lines
46+
self._keep_images = keep_images
47+
4048
self._dpage: Optional[SegmentedPdfPage] = None
4149
self._unloaded = False
4250
self.valid = (self._ppage is not None) and (self._dp_doc is not None)
@@ -47,9 +55,9 @@ def _ensure_parsed(self) -> None:
4755

4856
seg_page = self._dp_doc.get_page(
4957
self._page_no + 1,
50-
keep_chars=True,
51-
keep_lines=True,
52-
keep_bitmaps=True,
58+
keep_chars=self._keep_chars,
59+
keep_lines=self._keep_lines,
60+
keep_bitmaps=self._keep_images,
5361
create_words=self._create_words,
5462
create_textlines=self._create_textlines,
5563
enforce_same_font=True,

0 commit comments

Comments
 (0)