Skip to content

Commit 0a08377

Browse files
feat: add new element types (#332)
This PR adds some new element types that can be used especially by pdf/image parition. The new element types correspond to the ones added in unstructured in this PR: Unstructured-IO/unstructured#2700
1 parent 4a2fd95 commit 0a08377

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
## 0.7.26-dev0
2+
* feat: add a set of new `ElementType`s to extend future element types recognition
3+
14
## 0.7.25
25

36
* fix: replace `Rectangle.is_in()` with `Rectangle.is_almost_subregion_of()` when filling in an inferred element with embedded text
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.7.25" # pragma: no cover
1+
__version__ = "0.7.26-dev0" # pragma: no cover

unstructured_inference/constants.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,20 @@ class Source(Enum):
2626

2727

2828
class ElementType:
29+
PARAGRAPH = "Paragraph"
2930
IMAGE = "Image"
31+
PARAGRAPH_IN_IMAGE = "ParagraphInImage"
3032
FIGURE = "Figure"
3133
PICTURE = "Picture"
3234
TABLE = "Table"
35+
PARAGRAPH_IN_TABLE = "ParagraphInTable"
3336
LIST = "List"
37+
FORM = "Form"
38+
PARAGRAPH_IN_FORM = "ParagraphInForm"
39+
CHECK_BOX_CHECKED = "CheckBoxChecked"
40+
CHECK_BOX_UNCHECKED = "CheckBoxUnchecked"
41+
RADIO_BUTTON_CHECKED = "RadioButtonChecked"
42+
RADIO_BUTTON_UNCHECKED = "RadioButtonUnchecked"
3443
LIST_ITEM = "List-item"
3544
FORMULA = "Formula"
3645
CAPTION = "Caption"
@@ -42,6 +51,9 @@ class ElementType:
4251
TEXT = "Text"
4352
UNCATEGORIZED_TEXT = "UncategorizedText"
4453
PAGE_BREAK = "PageBreak"
54+
CODE_SNIPPET = "CodeSnippet"
55+
PAGE_NUMBER = "PageNumber"
56+
OTHER = "Other"
4557

4658

4759
FULL_PAGE_REGION_THRESHOLD = 0.99

0 commit comments

Comments
 (0)