Skip to content

Commit 5c295c5

Browse files
Update unstructured inference detection models to output class probs (#191)
Updated the detection models we currently support to output classification probabilities and persist those through unstructured-inference LayoutElement. Next step once this version is released is to update Unstructured package to allow the probabilities from unstructured-inference to persist all the way to the end of partition through metadata. --------- Co-authored-by: Alan Bertl <[email protected]>
1 parent e931bef commit 5c295c5

File tree

8 files changed

+59
-6
lines changed

8 files changed

+59
-6
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.5.22
2+
3+
* Add object-detection classification probabilities to LayoutElement for all currently implemented object detection models
4+
15
## 0.5.21
26

37
* adds `safe_division` to replae 0 with machine epsilon for `float` to avoid division by 0

test_unstructured_inference/models/test_detectron2onnx.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,14 @@ def test_inference():
7070
assert len(elements) == 1
7171
element = elements[0]
7272
(x1, y1), _, (x2, y2), _ = element.coordinates
73+
assert hasattr(
74+
element,
75+
"prob",
76+
) # NOTE(pravin) New Assertion to Make Sure element has probabilities
77+
assert isinstance(
78+
element.prob,
79+
float,
80+
) # NOTE(pravin) New Assertion to Make Sure Populated Probability is Float
7381
# NOTE(alan): The bbox coordinates get resized, so check their relative proportions
7482
assert x2 / x1 == pytest.approx(3.0) # x1 == 1, x2 == 3 before scaling
7583
assert y2 / y1 == pytest.approx(2.0) # y1 == 2, y2 == 4 before scaling

test_unstructured_inference/models/test_model.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ def test_model_initializes_once():
6363
"models",
6464
{},
6565
):
66-
doc = layout.DocumentLayout.from_file("sample-docs/layout-parser-paper.pdf")
67-
66+
doc = layout.DocumentLayout.from_file("sample-docs/loremipsum.pdf")
6867
doc.pages[0].detection_model.initializer.assert_called_once()
68+
assert hasattr(
69+
doc.pages[0].elements[0],
70+
"prob",
71+
) # NOTE(pravin) New Assertion to Make Sure Elements have probability attribute
72+
assert (
73+
doc.pages[0].elements[0].prob is None
74+
) # NOTE(pravin) New Assertion to Make Sure Uncategorized Text has None Probability

test_unstructured_inference/models/test_yolox.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ def test_layout_yolox_local_parsing_image():
1515
assert len(document_layout.pages) == 1
1616
# NOTE(benjamin) The example sent to the test contains 13 detections
1717
assert len(document_layout.pages[0].elements) == 13
18+
assert hasattr(
19+
document_layout.pages[0].elements[0],
20+
"prob",
21+
) # NOTE(pravin) New Assertion to Make Sure LayoutElement has probabilities
22+
assert isinstance(
23+
document_layout.pages[0].elements[0].prob,
24+
float,
25+
) # NOTE(pravin) New Assertion to Make Sure Populated Probability is Float
1826

1927

2028
@pytest.mark.slow()
@@ -26,6 +34,14 @@ def test_layout_yolox_local_parsing_pdf():
2634
assert len(document_layout.pages) == 1
2735
# NOTE(benjamin) The example sent to the test contains 5 detections
2836
assert len(document_layout.pages[0].elements) == 5
37+
assert hasattr(
38+
document_layout.pages[0].elements[0],
39+
"prob",
40+
) # NOTE(pravin) New Assertion to Make Sure LayoutElement has probabilities
41+
assert isinstance(
42+
document_layout.pages[0].elements[0].prob,
43+
float,
44+
) # NOTE(pravin) New Assertion to Make Sure Populated Probability is Float
2945

3046

3147
@pytest.mark.slow()
@@ -51,6 +67,14 @@ def test_layout_yolox_local_parsing_image_soft():
5167
assert len(document_layout.pages) == 1
5268
# NOTE(benjamin) Soft version of the test, run make test-long in order to run with full model
5369
assert len(document_layout.pages[0].elements) > 0
70+
assert hasattr(
71+
document_layout.pages[0].elements[0],
72+
"prob",
73+
) # NOTE(pravin) New Assertion to Make Sure LayoutElement has probabilities
74+
assert isinstance(
75+
document_layout.pages[0].elements[0].prob,
76+
float,
77+
) # NOTE(pravin) New Assertion to Make Sure Populated Probability is Float
5478

5579

5680
def test_layout_yolox_local_parsing_pdf_soft():
@@ -61,6 +85,13 @@ def test_layout_yolox_local_parsing_pdf_soft():
6185
assert len(document_layout.pages) == 1
6286
# NOTE(benjamin) Soft version of the test, run make test-long in order to run with full model
6387
assert len(document_layout.pages[0].elements) > 0
88+
assert hasattr(
89+
document_layout.pages[0].elements[0],
90+
"prob",
91+
) # NOTE(pravin) New Assertion to Make Sure LayoutElement has probabilities
92+
assert (
93+
document_layout.pages[0].elements[0].prob is None
94+
) # NOTE(pravin) New Assertion to Make Sure Uncategorized Text has None Probability
6495

6596

6697
def test_layout_yolox_local_parsing_empty_pdf_soft():
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.21" # pragma: no cover
1+
__version__ = "0.5.22" # pragma: no cover

unstructured_inference/inference/layoutelement.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
@dataclass
2222
class LayoutElement(TextRegion):
2323
type: Optional[str] = None
24+
prob: Optional[float] = None
2425

2526
def extract_text(
2627
self,
@@ -48,6 +49,7 @@ def to_dict(self) -> dict:
4849
"coordinates": self.coordinates,
4950
"text": self.text,
5051
"type": self.type,
52+
"prob": self.prob,
5153
}
5254
return out_dict
5355

@@ -65,7 +67,8 @@ def from_lp_textblock(cls, textblock: TextBlock):
6567
x1, y1, x2, y2 = textblock.coordinates
6668
text = textblock.text
6769
type = textblock.type
68-
return cls(x1, y1, x2, y2, text, type)
70+
score = textblock.score
71+
return cls(x1, y1, x2, y2, text, type, prob=score)
6972

7073

7174
def interpret_table_block(text_block: TextRegion, image: Image.Image) -> str:

unstructured_inference/models/detectron2onnx.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def postprocess(
157157
y2 * height_conversion,
158158
text=None,
159159
type=detected_class,
160+
prob=conf,
160161
)
161162

162163
regions.append(region)

unstructured_inference/models/yolox.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,9 @@ def image_processing(
141141
for det in dets:
142142
# Each detection should have (x1,y1,x2,y2,probability,class) format
143143
# being (x1,y1) the top left and (x2,y2) the bottom right
144-
x1, y1, x2, y2, _, class_id = det.tolist()
144+
x1, y1, x2, y2, prob, class_id = det.tolist()
145145
detected_class = self.layout_classes[int(class_id)]
146-
region = LayoutElement(x1, y1, x2, y2, text=None, type=detected_class)
146+
region = LayoutElement(x1, y1, x2, y2, text=None, type=detected_class, prob=prob)
147147

148148
regions.append(region)
149149

0 commit comments

Comments
 (0)