Skip to content

Commit 0c6ad3f

Browse files
committed
Merge remote-tracking branch 'upstream/main' into pprados/fix_password
2 parents 014f2fe + 27cd53b commit 0c6ad3f

File tree

6 files changed

+1073
-897
lines changed

6 files changed

+1073
-897
lines changed

.github/workflows/ingest-test-fixtures-update-pr.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ jobs:
109109
sudo apt-get install -y tesseract-ocr-kor
110110
sudo apt-get install diffstat
111111
tesseract --version
112+
python -m nltk.downloader punkt_tab averaged_perceptron_tagger_eng
112113
./test_unstructured_ingest/test-ingest-src.sh
113114
114115
- name: Save branch name to environment file

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
## 0.16.14-dev0
2+
3+
### Enhancements
4+
5+
### Features
6+
7+
### Fixes
8+
- **Fix an issue with multiple values for `infer_table_structure`** when paritioning email with image attachements the kwarg calls into `partition` to partition the image already contains `infer_table_structure`. Now `partition` function checks if the `kwarg` has `infer_table_structure` already
9+
110
## 0.16.13
211

312
### Enhancements

test_unstructured/partition/test_auto.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,33 @@ def test_auto_partition_pdf_with_fast_strategy(request: FixtureRequest):
570570
)
571571

572572

573+
@pytest.mark.parametrize("infer_bool", [True, False])
574+
def test_auto_handles_kwarg_with_infer_table_structure(infer_bool):
575+
with patch(
576+
"unstructured.partition.pdf_image.ocr.process_file_with_ocr",
577+
) as mock_process_file_with_model:
578+
partition(
579+
example_doc_path("pdf/layout-parser-paper-fast.pdf"),
580+
pdf_infer_table_structure=True,
581+
strategy=PartitionStrategy.HI_RES,
582+
infer_table_structure=infer_bool,
583+
)
584+
assert mock_process_file_with_model.call_args[1]["infer_table_structure"] is infer_bool
585+
586+
587+
def test_auto_handles_kwarg_with_infer_table_structure_when_none():
588+
with patch(
589+
"unstructured.partition.pdf_image.ocr.process_file_with_ocr",
590+
) as mock_process_file_with_model:
591+
partition(
592+
example_doc_path("pdf/layout-parser-paper-fast.pdf"),
593+
pdf_infer_table_structure=True,
594+
strategy=PartitionStrategy.HI_RES,
595+
infer_table_structure=None,
596+
)
597+
assert mock_process_file_with_model.call_args[1]["infer_table_structure"] is True
598+
599+
573600
def test_auto_partition_pdf_uses_pdf_infer_table_structure_argument():
574601
with patch(
575602
"unstructured.partition.pdf_image.ocr.process_file_with_ocr",

0 commit comments

Comments
 (0)