Skip to content

Commit 1a5146a

Browse files
authored
fix(ocr): use PSM integer values directly instead of constructor (#2578)
* fix(ocr): use PSM integer values directly instead of constructor - Use integer psm value directly instead of calling tesserocr.PSM() - Fixed in both main_psm and script_readers initialization - tesserocr.PSM is a class with integer constants, not an enum Fixes #2576 * DCO Remediation Commit for mulgyeol <[email protected]> I, mulgyeol <[email protected]>, hereby add my Signed-off-by to this commit: da63a17 Signed-off-by: mulgyeol <[email protected]> --------- Signed-off-by: mulgyeol <[email protected]>
1 parent 32a5aed commit 1a5146a

File tree

2 files changed

+3
-4
lines changed

2 files changed

+3
-4
lines changed

docling/models/tesseract_ocr_model.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ def __init__(
9797

9898
# Set main OCR reader with configurable PSM
9999
main_psm = (
100-
tesserocr.PSM(self.options.psm)
101-
if self.options.psm is not None
102-
else tesserocr.PSM.AUTO
100+
self.options.psm if self.options.psm is not None else tesserocr.PSM.AUTO
103101
)
104102
if lang == "auto":
105103
self.reader = tesserocr.PyTessBaseAPI(psm=main_psm, **tesserocr_kwargs)
@@ -195,7 +193,7 @@ def __call__(
195193
tesserocr.PyTessBaseAPI(
196194
path=self.reader.GetDatapath(),
197195
lang=lang,
198-
psm=tesserocr.PSM(self.options.psm)
196+
psm=self.options.psm
199197
if self.options.psm is not None
200198
else tesserocr.PSM.AUTO,
201199
init=True,

tests/test_e2e_ocr_conversion.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def test_e2e_conversions():
6363
(TesseractOcrOptions(), True),
6464
(TesseractCliOcrOptions(), True),
6565
(EasyOcrOptions(), False),
66+
(TesseractOcrOptions(psm=3), True),
6667
(TesseractOcrOptions(force_full_page_ocr=True), True),
6768
(TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True),
6869
(TesseractCliOcrOptions(force_full_page_ocr=True), True),

0 commit comments

Comments
 (0)