Skip to content

Commit dd17732

Browse files
authored
Merge branch 'main' into main
2 parents 90c0b12 + 9d3d695 commit dd17732

File tree

12 files changed

+468
-250
lines changed

12 files changed

+468
-250
lines changed

CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,30 @@
1+
## 1.1.1
2+
3+
* Add NotImplementedError when trying to single index a TextRegions, reflecting the fact that it won't behave correctly at the moment.
4+
5+
## 1.1.0
6+
7+
* Enhancement: Add `TextSource` to track where the text of an element came from
8+
* Enhancement: Refactor `__post_init__` of `TextRegions` and `LayoutElement` slightly to automate initialization
9+
10+
## 1.0.10
11+
12+
* Remove merging logic that's no longer used
13+
14+
## 1.0.9
15+
16+
* Make OD model loading thread safe
17+
18+
## 1.0.8
19+
20+
* Enhancement: Optimized `zoom_image` (codeflash)
21+
* Enhancement: Optimized `cells_to_html` for an 8% speedup in some cases (codeflash)
22+
* Enhancement: Optimized `outputs_to_objects` for an 88% speedup in some cases (codeflash)
23+
24+
## 1.0.7
25+
26+
* Fix a hardcoded file extension causing confusion in the logs
27+
128
## 1.0.6
229

330
* Add slicing through indexing for vectorized elements

test_unstructured_inference/inference/test_layout.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
EmbeddedTextRegion,
1414
ImageTextRegion,
1515
)
16+
from unstructured_inference.constants import IsExtracted
1617
from unstructured_inference.models.unstructuredmodel import (
1718
UnstructuredElementExtractionModel,
1819
UnstructuredObjectDetectionModel,
@@ -34,7 +35,7 @@ def mock_initial_layout():
3435
6,
3536
8,
3637
text="A very repetitive narrative. " * 10,
37-
source="Mock",
38+
is_extracted=IsExtracted.TRUE,
3839
)
3940

4041
title_block = EmbeddedTextRegion.from_coords(
@@ -43,7 +44,7 @@ def mock_initial_layout():
4344
3,
4445
4,
4546
text="A Catchy Title",
46-
source="Mock",
47+
is_extracted=IsExtracted.TRUE,
4748
)
4849

4950
return [text_block, title_block]

test_unstructured_inference/inference/test_layout_element.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from unstructured_inference.inference.layoutelement import LayoutElement, TextRegion
2+
from unstructured_inference.constants import IsExtracted, Source
23

34

4-
def test_layout_element_do_dict(mock_layout_element):
5+
def test_layout_element_to_dict(mock_layout_element):
56
expected = {
67
"coordinates": ((100, 100), (100, 300), (300, 300), (300, 100)),
78
"text": "Sample text",
9+
"is_extracted": None,
810
"type": "Text",
911
"prob": None,
1012
"source": None,
@@ -18,3 +20,31 @@ def test_layout_element_from_region(mock_rectangle):
1820
region = TextRegion(bbox=mock_rectangle)
1921

2022
assert LayoutElement.from_region(region) == expected
23+
24+
25+
def test_layoutelement_inheritance_works_correctly():
26+
"""Test that LayoutElement properly inherits from TextRegion without conflicts"""
27+
from unstructured_inference.inference.elements import TextRegion
28+
29+
# Create a TextRegion with both source and text_source
30+
region = TextRegion.from_coords(
31+
0, 0, 10, 10, text="test", source=Source.YOLOX, is_extracted=IsExtracted.TRUE
32+
)
33+
34+
# Convert to LayoutElement
35+
element = LayoutElement.from_region(region)
36+
37+
# Check that both properties are preserved
38+
assert element.source == Source.YOLOX, "LayoutElement should inherit source from TextRegion"
39+
assert (
40+
element.is_extracted == IsExtracted.TRUE
41+
), "LayoutElement should inherit is_extracted from TextRegion"
42+
43+
# Check that to_dict() works correctly
44+
d = element.to_dict()
45+
assert d["source"] == Source.YOLOX
46+
assert d["is_extracted"] == IsExtracted.TRUE
47+
48+
# Check that we can set source directly on LayoutElement
49+
element.source = Source.DETECTRON2_ONNX
50+
assert element.source == Source.DETECTRON2_ONNX

test_unstructured_inference/models/test_model.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import threading
23
from typing import Any
34
from unittest import mock
45

@@ -40,6 +41,49 @@ def test_get_model(monkeypatch):
4041
assert isinstance(models.get_model("yolox"), MockModel)
4142

4243

44+
def test_get_model_threaded(monkeypatch):
45+
"""Test that get_model works correctly when called from multiple threads simultaneously."""
46+
monkeypatch.setattr(models, "models", {})
47+
48+
# Results and exceptions from threads will be stored here
49+
results = []
50+
exceptions = []
51+
52+
def get_model_worker(thread_id):
53+
"""Worker function for each thread."""
54+
try:
55+
model = models.get_model("yolox")
56+
results.append((thread_id, model))
57+
except Exception as e:
58+
exceptions.append((thread_id, e))
59+
60+
# Create and start multiple threads
61+
num_threads = 10
62+
threads = []
63+
64+
with mock.patch.dict(models.model_class_map, {"yolox": MockModel}):
65+
for i in range(num_threads):
66+
thread = threading.Thread(target=get_model_worker, args=(i,))
67+
threads.append(thread)
68+
thread.start()
69+
70+
# Wait for all threads to complete
71+
for thread in threads:
72+
thread.join()
73+
74+
# Verify no exceptions occurred
75+
assert len(exceptions) == 0, f"Exceptions occurred in threads: {exceptions}"
76+
77+
# Verify all threads got results
78+
assert len(results) == num_threads, f"Expected {num_threads} results, got {len(results)}"
79+
80+
# Verify all results are MockModel instances
81+
for thread_id, model in results:
82+
assert isinstance(
83+
model, MockModel
84+
), f"Thread {thread_id} got unexpected model type: {type(model)}"
85+
86+
4387
def test_register_new_model():
4488
assert "foo" not in models.model_class_map
4589
assert "foo" not in models.model_config_map

0 commit comments

Comments
 (0)