Skip to content

Commit 8ccb5cd

Browse files
committed
feat: allow extra config with multiple output command
1 parent 91385fa commit 8ccb5cd

File tree

3 files changed

+22
-3
lines changed

3 files changed

+22
-3
lines changed

tests/pytesseract_test.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,24 @@ def test_run_and_get_multiple_output(test_file, function_mapping, extensions):
267267
assert result == function_mapping[extension](test_file)
268268

269269

270+
def test_run_and_get_multiple_output_with_extra_config(
271+
test_file,
272+
function_mapping,
273+
):
274+
compound_results = run_and_get_multiple_output(
275+
test_file,
276+
extensions=['hocr', 'txt'],
277+
extra_config='hocr_char_boxes=1',
278+
)
279+
assert (
280+
compound_results[0][:1000]
281+
== function_mapping['hocr'](test_file, config='-c hocr_char_boxes=1')[
282+
:1000
283+
]
284+
)
285+
assert compound_results[1] == function_mapping['txt'](test_file)
286+
287+
270288
@pytest.mark.skipif(
271289
TESSERACT_VERSION[:2] < (4, 1),
272290
reason='requires tesseract >= 4.1',

unstructured_pytesseract/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@
1616
from .pytesseract import TSVNotSupported
1717

1818

19-
__version__ = '0.3.13'
19+
__version__ = '0.3.14'

unstructured_pytesseract/pytesseract.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,15 +297,16 @@ def run_and_get_multiple_output(
297297
lang: Optional[str] = None,
298298
nice: int = 0,
299299
timeout: int = 0,
300+
extra_config: str = '',
300301
return_bytes: bool = False,
301302
):
302303
config = ' '.join(
303304
EXTENTION_TO_CONFIG.get(extension, '') for extension in extensions
304305
).strip()
305306
if config:
306-
config = f'-c {config}'
307+
config = f'-c {config} {extra_config}'
307308
else:
308-
config = ''
309+
config = extra_config
309310

310311
with save(image) as (temp_name, input_filename):
311312
kwargs = {

0 commit comments

Comments
 (0)