Skip to content

Commit ea597d4

Browse files
authored
feat: allow extra config with multiple output command (#9)
* feat: allow extra config with multiple output command * upgrade runner to 22.04
1 parent 91385fa commit ea597d4

File tree

4 files changed

+27
-8
lines changed

4 files changed

+27
-8
lines changed

.github/workflows/ci.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ jobs:
1919
fail-fast: true
2020
matrix:
2121
include:
22-
- {name: '3.12', python: '3.12', os: ubuntu-20.04, tox: py312}
23-
- {name: '3.11', python: '3.11', os: ubuntu-20.04, tox: py311}
24-
- {name: '3.10', python: '3.10', os: ubuntu-20.04, tox: py310}
25-
- {name: '3.9', python: '3.9', os: ubuntu-20.04, tox: py39}
26-
- {name: '3.8', python: '3.8', os: ubuntu-20.04, tox: py38}
22+
- {name: '3.12', python: '3.12', os: ubuntu-22.04, tox: py312}
23+
- {name: '3.11', python: '3.11', os: ubuntu-22.04, tox: py311}
24+
- {name: '3.10', python: '3.10', os: ubuntu-22.04, tox: py310}
25+
- {name: '3.9', python: '3.9', os: ubuntu-22.04, tox: py39}
26+
- {name: '3.8', python: '3.8', os: ubuntu-22.04, tox: py38}
2727
steps:
2828
- uses: actions/checkout@v3
2929

tests/pytesseract_test.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,24 @@ def test_run_and_get_multiple_output(test_file, function_mapping, extensions):
267267
assert result == function_mapping[extension](test_file)
268268

269269

270+
def test_run_and_get_multiple_output_with_extra_config(
271+
test_file,
272+
function_mapping,
273+
):
274+
compound_results = run_and_get_multiple_output(
275+
test_file,
276+
extensions=['hocr', 'txt'],
277+
extra_config='hocr_char_boxes=1',
278+
)
279+
assert (
280+
compound_results[0][:1000]
281+
== function_mapping['hocr'](test_file, config='-c hocr_char_boxes=1')[
282+
:1000
283+
]
284+
)
285+
assert compound_results[1] == function_mapping['txt'](test_file)
286+
287+
270288
@pytest.mark.skipif(
271289
TESSERACT_VERSION[:2] < (4, 1),
272290
reason='requires tesseract >= 4.1',

unstructured_pytesseract/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@
1616
from .pytesseract import TSVNotSupported
1717

1818

19-
__version__ = '0.3.13'
19+
__version__ = '0.3.14'

unstructured_pytesseract/pytesseract.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,15 +297,16 @@ def run_and_get_multiple_output(
297297
lang: Optional[str] = None,
298298
nice: int = 0,
299299
timeout: int = 0,
300+
extra_config: str = '',
300301
return_bytes: bool = False,
301302
):
302303
config = ' '.join(
303304
EXTENTION_TO_CONFIG.get(extension, '') for extension in extensions
304305
).strip()
305306
if config:
306-
config = f'-c {config}'
307+
config = f'-c {config} {extra_config}'
307308
else:
308-
config = ''
309+
config = extra_config
309310

310311
with save(image) as (temp_name, input_filename):
311312
kwargs = {

0 commit comments

Comments
 (0)