diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ea6dae6..4bdace7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.6 + +* Add slicing through indexing for vectorized elements + ## 1.0.5 * feat: add thread lock to prevent racing condition when instantiating singletons diff --git a/test_unstructured_inference/test_elements.py b/test_unstructured_inference/test_elements.py index 89efd870..81db5f2f 100644 --- a/test_unstructured_inference/test_elements.py +++ b/test_unstructured_inference/test_elements.py @@ -472,3 +472,49 @@ def test_layoutelements_concatenate(): assert joint.sources.tolist() == ["yolox", "yolox", "ocr", "ocr"] assert joint.element_class_ids.tolist() == [0, 1, 1, 2] assert joint.element_class_id_map == {0: "type0", 1: "type1", 2: "type2"} + + +@pytest.mark.parametrize( + "test_elements", + [ + TextRegions( + element_coords=np.array( + [ + [0.0, 0.0, 1.0, 1.0], + [1.0, 0.0, 1.5, 1.0], + [2.0, 0.0, 2.5, 1.0], + [3.0, 0.0, 4.0, 1.0], + [4.0, 0.0, 5.0, 1.0], + ] + ), + texts=np.array(["0", "1", "2", "3", "4"]), + sources=np.array(["foo", "foo", "foo", "foo", "foo"], dtype=" TextRegions: + return self.slice(indices) + def slice(self, indices) -> TextRegions: """slice text regions based on indices""" return TextRegions( diff --git a/unstructured_inference/inference/layoutelement.py b/unstructured_inference/inference/layoutelement.py index b70ad651..5b4c6fda 100644 --- a/unstructured_inference/inference/layoutelement.py +++ b/unstructured_inference/inference/layoutelement.py @@ -75,6 +75,9 @@ def __eq__(self, other: object) -> bool: and np.array_equal(self.table_as_cells[mask], other.table_as_cells[mask]) ) + def __getitem__(self, indices): + return self.slice(indices) + def slice(self, indices) -> LayoutElements: """slice and return only selected indices""" return LayoutElements(