From 34dd43e4cdaa281ce666862113d09ffcc6130f9b Mon Sep 17 00:00:00 2001 From: tbs17 Date: Fri, 17 Jan 2025 15:33:06 -0500 Subject: [PATCH 1/8] remove layoutparser lib --- CHANGELOG.md | 5 + README.md | 4 - requirements/base.in | 1 - requirements/base.txt | 113 ++++++----------- requirements/dev.txt | 116 +++++++++--------- requirements/test.txt | 54 ++++---- .../inference/test_layout_element.py | 23 +--- unstructured_inference/__version__.py | 2 +- .../inference/layoutelement.py | 19 --- 9 files changed, 124 insertions(+), 213 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19a3787e..ea577662 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 0.8.3 + +* fix: removed `layoutelement.from_lp_textblock()` and related tests as it's not used +* fix: update requirements to drop `layoutparser` lib + ## 0.8.2 * fix: fix bug when an empty list is passed into `TextRegions.from_list` triggers `IndexError` diff --git a/README.md b/README.md index ac759757..f1f4c42c 100644 --- a/README.md +++ b/README.md @@ -72,10 +72,6 @@ model = get_model("yolox") layout = DocumentLayout.from_file("sample-docs/layout-parser-paper.pdf", detection_model=model) ``` -### Using models from the layoutparser model zoo - -The `UnstructuredDetectronModel` class in `unstructured_inference.modelts.detectron2` uses the `faster_rcnn_R_50_FPN_3x` model pretrained on DocLayNet, but by using different construction parameters, any model in the `layoutparser` [model zoo](https://layout-parser.readthedocs.io/en/latest/notes/modelzoo.html) can be used. `UnstructuredDetectronModel` is a light wrapper around the `layoutparser` `Detectron2LayoutModel` object, and accepts the same arguments. See [layoutparser documentation](https://layout-parser.readthedocs.io/en/latest/api_doc/models.html#layoutparser.models.Detectron2LayoutModel) for details. - ### Using your own model Any detection model can be used for in the `unstructured_inference` pipeline by wrapping the model in the `UnstructuredObjectDetectionModel` class. To integrate with the `DocumentLayout` class, a subclass of `UnstructuredObjectDetectionModel` must have a `predict` method that accepts a `PIL.Image.Image` and returns a list of `LayoutElement`s, and an `initialize` method, which loads the model and prepares it for inference. diff --git a/requirements/base.in b/requirements/base.in index 8dfa4cad..9bf49f77 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,5 +1,4 @@ -c constraints.in -layoutparser python-multipart huggingface-hub numpy<2 diff --git a/requirements/base.txt b/requirements/base.txt index 89d7fa15..27b303e7 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,36 +4,30 @@ # # pip-compile requirements/base.in # -certifi==2024.8.30 +certifi==2024.12.14 + # via requests +charset-normalizer==3.4.1 # via requests -cffi==1.17.1 - # via cryptography -charset-normalizer==3.3.2 - # via - # pdfminer-six - # requests coloredlogs==15.0.1 # via onnxruntime contourpy==1.3.0 # via matplotlib -cryptography==43.0.1 - # via pdfminer-six cycler==0.12.1 # via matplotlib -filelock==3.16.0 +filelock==3.16.1 # via # huggingface-hub # torch # transformers -flatbuffers==24.3.25 +flatbuffers==24.12.23 # via onnxruntime -fonttools==4.53.1 +fonttools==4.55.3 # via matplotlib -fsspec==2024.9.0 +fsspec==2024.12.0 # via # huggingface-hub # torch -huggingface-hub==0.24.7 +huggingface-hub==0.27.1 # via # -r requirements/base.in # timm @@ -41,21 +35,17 @@ huggingface-hub==0.24.7 # transformers humanfriendly==10.0 # via coloredlogs -idna==3.8 +idna==3.10 # via requests -importlib-resources==6.4.5 +importlib-resources==6.5.2 # via matplotlib -iopath==0.1.10 - # via layoutparser -jinja2==3.1.4 +jinja2==3.1.5 # via torch kiwisolver==1.4.7 # via matplotlib -layoutparser==0.3.4 - # via -r requirements/base.in -markupsafe==2.1.5 +markupsafe==3.0.2 # via jinja2 -matplotlib==3.9.2 +matplotlib==3.9.4 # via -r requirements/base.in mpmath==1.3.0 # via sympy @@ -65,116 +55,83 @@ numpy==1.26.4 # via # -r requirements/base.in # contourpy - # layoutparser # matplotlib # onnx # onnxruntime # opencv-python - # pandas - # scipy # torchvision # transformers -onnx==1.16.2 +onnx==1.17.0 # via -r requirements/base.in onnxruntime==1.19.2 # via -r requirements/base.in -opencv-python==4.10.0.84 - # via - # -r requirements/base.in - # layoutparser -packaging==24.1 +opencv-python==4.11.0.86 + # via -r requirements/base.in +packaging==24.2 # via # huggingface-hub # matplotlib # onnxruntime # transformers -pandas==2.2.2 - # via layoutparser -pdf2image==1.17.0 - # via layoutparser -pdfminer-six==20231228 - # via pdfplumber -pdfplumber==0.11.4 - # via layoutparser -pillow==10.4.0 +pillow==11.1.0 # via - # layoutparser # matplotlib - # pdf2image - # pdfplumber # torchvision -portalocker==2.10.1 - # via iopath -protobuf==5.28.1 +protobuf==5.29.3 # via # onnx # onnxruntime -pycparser==2.22 - # via cffi -pyparsing==3.1.4 +pyparsing==3.2.1 # via matplotlib -pypdfium2==4.30.0 - # via pdfplumber python-dateutil==2.9.0.post0 - # via - # matplotlib - # pandas -python-multipart==0.0.9 + # via matplotlib +python-multipart==0.0.20 # via -r requirements/base.in -pytz==2024.2 - # via pandas pyyaml==6.0.2 # via # huggingface-hub - # layoutparser # timm # transformers -rapidfuzz==3.9.7 +rapidfuzz==3.11.0 # via -r requirements/base.in -regex==2024.9.11 +regex==2024.11.6 # via transformers requests==2.32.3 # via # huggingface-hub # transformers -safetensors==0.4.5 +safetensors==0.5.2 # via # timm # transformers -scipy==1.13.1 - # via layoutparser -six==1.16.0 +six==1.17.0 # via python-dateutil -sympy==1.13.2 +sympy==1.13.1 # via # onnxruntime # torch -timm==1.0.9 +timm==1.0.13 # via -r requirements/base.in -tokenizers==0.19.1 +tokenizers==0.21.0 # via transformers -torch==2.4.1 +torch==2.5.1 # via # -r requirements/base.in # timm # torchvision -torchvision==0.19.1 +torchvision==0.20.1 # via timm -tqdm==4.66.5 +tqdm==4.67.1 # via # huggingface-hub - # iopath # transformers -transformers==4.44.2 +transformers==4.48.0 # via -r requirements/base.in typing-extensions==4.12.2 # via # huggingface-hub - # iopath # torch -tzdata==2024.1 - # via pandas -urllib3==2.2.3 +urllib3==2.3.0 # via requests -zipp==3.20.2 +zipp==3.21.0 # via importlib-resources diff --git a/requirements/dev.txt b/requirements/dev.txt index 3b015f48..d6a933f2 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,7 +4,7 @@ # # pip-compile requirements/dev.in # -anyio==4.4.0 +anyio==4.8.0 # via # -c requirements/test.txt # httpx @@ -17,11 +17,11 @@ argon2-cffi-bindings==21.2.0 # via argon2-cffi arrow==1.3.0 # via isoduration -asttokens==2.4.1 +asttokens==3.0.0 # via stack-data async-lru==2.0.4 # via jupyterlab -attrs==24.2.0 +attrs==24.3.0 # via # jsonschema # referencing @@ -29,11 +29,11 @@ babel==2.16.0 # via jupyterlab-server beautifulsoup4==4.12.3 # via nbconvert -bleach==6.1.0 +bleach[css]==6.2.0 # via nbconvert -build==1.2.2 +build==1.2.2.post1 # via pip-tools -certifi==2024.8.30 +certifi==2024.12.14 # via # -c requirements/base.txt # -c requirements/test.txt @@ -41,15 +41,13 @@ certifi==2024.8.30 # httpx # requests cffi==1.17.1 - # via - # -c requirements/base.txt - # argon2-cffi-bindings -charset-normalizer==3.3.2 + # via argon2-cffi-bindings +charset-normalizer==3.4.1 # via # -c requirements/base.txt # -c requirements/test.txt # requests -click==8.1.7 +click==8.1.8 # via # -c requirements/test.txt # pip-tools @@ -65,7 +63,7 @@ cycler==0.12.1 # via # -c requirements/base.txt # matplotlib -debugpy==1.8.5 +debugpy==1.8.12 # via ipykernel decorator==5.1.1 # via ipython @@ -78,9 +76,9 @@ exceptiongroup==1.2.2 # ipython executing==2.1.0 # via stack-data -fastjsonschema==2.20.0 +fastjsonschema==2.21.1 # via nbformat -fonttools==4.53.1 +fonttools==4.55.3 # via # -c requirements/base.txt # matplotlib @@ -90,15 +88,15 @@ h11==0.14.0 # via # -c requirements/test.txt # httpcore -httpcore==1.0.5 +httpcore==1.0.7 # via # -c requirements/test.txt # httpx -httpx==0.27.2 +httpx==0.28.1 # via # -c requirements/test.txt # jupyterlab -idna==3.8 +idna==3.10 # via # -c requirements/base.txt # -c requirements/test.txt @@ -114,7 +112,7 @@ importlib-metadata==8.5.0 # jupyterlab # jupyterlab-server # nbconvert -importlib-resources==6.4.5 +importlib-resources==6.5.2 # via # -c requirements/base.txt # matplotlib @@ -133,16 +131,16 @@ ipywidgets==8.1.5 # via jupyter isoduration==20.11.0 # via jsonschema -jedi==0.19.1 +jedi==0.19.2 # via ipython -jinja2==3.1.4 +jinja2==3.1.5 # via # -c requirements/base.txt # jupyter-server # jupyterlab # jupyterlab-server # nbconvert -json5==0.9.25 +json5==0.10.0 # via jupyterlab-server jsonpointer==3.0.0 # via jsonschema @@ -151,11 +149,11 @@ jsonschema[format-nongpl]==4.23.0 # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2023.12.1 +jsonschema-specifications==2024.10.1 # via jsonschema jupyter==1.1.1 # via -r requirements/dev.in -jupyter-client==8.6.2 +jupyter-client==8.6.3 # via # ipykernel # jupyter-console @@ -173,11 +171,11 @@ jupyter-core==5.7.2 # nbclient # nbconvert # nbformat -jupyter-events==0.10.0 +jupyter-events==0.11.0 # via jupyter-server jupyter-lsp==2.2.5 # via jupyterlab -jupyter-server==2.14.2 +jupyter-server==2.15.0 # via # jupyter-lsp # jupyterlab @@ -186,7 +184,7 @@ jupyter-server==2.14.2 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.2.5 +jupyterlab==4.3.4 # via # jupyter # notebook @@ -202,12 +200,12 @@ kiwisolver==1.4.7 # via # -c requirements/base.txt # matplotlib -markupsafe==2.1.5 +markupsafe==3.0.2 # via # -c requirements/base.txt # jinja2 # nbconvert -matplotlib==3.9.2 +matplotlib==3.9.4 # via # -c requirements/base.txt # -r requirements/dev.in @@ -215,11 +213,11 @@ matplotlib-inline==0.1.7 # via # ipykernel # ipython -mistune==3.0.2 +mistune==3.1.0 # via nbconvert -nbclient==0.10.0 +nbclient==0.10.2 # via nbconvert -nbconvert==7.16.4 +nbconvert==7.16.5 # via # jupyter # jupyter-server @@ -230,7 +228,7 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -notebook==7.2.2 +notebook==7.3.2 # via jupyter notebook-shim==0.2.4 # via @@ -243,7 +241,7 @@ numpy==1.26.4 # matplotlib overrides==7.7.0 # via jupyter-server -packaging==24.1 +packaging==24.2 # via # -c requirements/base.txt # -c requirements/test.txt @@ -260,24 +258,24 @@ parso==0.8.4 # via jedi pexpect==4.9.0 # via ipython -pillow==10.4.0 +pillow==11.1.0 # via # -c requirements/base.txt # -c requirements/test.txt # matplotlib pip-tools==7.4.1 # via -r requirements/dev.in -platformdirs==4.3.2 +platformdirs==4.3.6 # via # -c requirements/test.txt # jupyter-core -prometheus-client==0.20.0 +prometheus-client==0.21.1 # via jupyter-server -prompt-toolkit==3.0.47 +prompt-toolkit==3.0.48 # via # ipython # jupyter-console -psutil==6.0.0 +psutil==6.1.1 # via ipykernel ptyprocess==0.7.0 # via @@ -286,19 +284,17 @@ ptyprocess==0.7.0 pure-eval==0.2.3 # via stack-data pycparser==2.22 - # via - # -c requirements/base.txt - # cffi -pygments==2.18.0 + # via cffi +pygments==2.19.1 # via # ipython # jupyter-console # nbconvert -pyparsing==3.1.4 +pyparsing==3.2.1 # via # -c requirements/base.txt # matplotlib -pyproject-hooks==1.1.0 +pyproject-hooks==1.2.0 # via # build # pip-tools @@ -308,7 +304,7 @@ python-dateutil==2.9.0.post0 # arrow # jupyter-client # matplotlib -python-json-logger==2.0.7 +python-json-logger==3.2.1 # via jupyter-events pyyaml==6.0.2 # via @@ -321,7 +317,7 @@ pyzmq==26.2.0 # jupyter-client # jupyter-console # jupyter-server -referencing==0.35.1 +referencing==0.36.1 # via # jsonschema # jsonschema-specifications @@ -339,24 +335,21 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rpds-py==0.20.0 +rpds-py==0.22.3 # via # jsonschema # referencing send2trash==1.8.3 # via jupyter-server -six==1.16.0 +six==1.17.0 # via # -c requirements/base.txt - # asttokens - # bleach # python-dateutil # rfc3339-validator sniffio==1.3.1 # via # -c requirements/test.txt # anyio - # httpx soupsieve==2.6 # via beautifulsoup4 stack-data==0.6.3 @@ -365,15 +358,15 @@ terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals -tinycss2==1.3.0 - # via nbconvert -tomli==2.0.1 +tinycss2==1.4.0 + # via bleach +tomli==2.2.1 # via # -c requirements/test.txt # build # jupyterlab # pip-tools -tornado==6.4.1 +tornado==6.4.2 # via # ipykernel # jupyter-client @@ -397,7 +390,7 @@ traitlets==5.14.3 # nbclient # nbconvert # nbformat -types-python-dateutil==2.9.0.20240906 +types-python-dateutil==2.9.0.20241206 # via arrow typing-extensions==4.12.2 # via @@ -406,16 +399,19 @@ typing-extensions==4.12.2 # anyio # async-lru # ipython + # mistune + # python-json-logger + # referencing uri-template==1.3.0 # via jsonschema -urllib3==2.2.3 +urllib3==2.3.0 # via # -c requirements/base.txt # -c requirements/test.txt # requests wcwidth==0.2.13 # via prompt-toolkit -webcolors==24.8.0 +webcolors==24.11.1 # via jsonschema webencodings==0.5.1 # via @@ -423,11 +419,11 @@ webencodings==0.5.1 # tinycss2 websocket-client==1.8.0 # via jupyter-server -wheel==0.44.0 +wheel==0.45.1 # via pip-tools widgetsnbextension==4.0.13 # via ipywidgets -zipp==3.20.2 +zipp==3.21.0 # via # -c requirements/base.txt # importlib-metadata diff --git a/requirements/test.txt b/requirements/test.txt index a4b6221c..6e3f0b28 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,25 +4,25 @@ # # pip-compile requirements/test.in # -anyio==4.4.0 +anyio==4.8.0 # via httpx -black==24.8.0 +black==24.10.0 # via -r requirements/test.in -certifi==2024.8.30 +certifi==2024.12.14 # via # -c requirements/base.txt # httpcore # httpx # requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via # -c requirements/base.txt # requests -click==8.1.7 +click==8.1.8 # via # -r requirements/test.in # black -coverage[toml]==7.6.1 +coverage[toml]==7.6.10 # via # -r requirements/test.in # pytest-cov @@ -30,7 +30,7 @@ exceptiongroup==1.2.2 # via # anyio # pytest -filelock==3.16.0 +filelock==3.16.1 # via # -c requirements/base.txt # huggingface-hub @@ -40,21 +40,21 @@ flake8==7.1.1 # flake8-docstrings flake8-docstrings==1.7.0 # via -r requirements/test.in -fsspec==2024.9.0 +fsspec==2024.12.0 # via # -c requirements/base.txt # huggingface-hub h11==0.14.0 # via httpcore -httpcore==1.0.5 +httpcore==1.0.7 # via httpx -httpx==0.27.2 +httpx==0.28.1 # via -r requirements/test.in -huggingface-hub==0.24.7 +huggingface-hub==0.27.1 # via # -c requirements/base.txt # -r requirements/test.in -idna==3.8 +idna==3.10 # via # -c requirements/base.txt # anyio @@ -64,13 +64,13 @@ iniconfig==2.0.0 # via pytest mccabe==0.7.0 # via flake8 -mypy==1.11.2 +mypy==1.14.1 # via -r requirements/test.in mypy-extensions==1.0.0 # via # black # mypy -packaging==24.1 +packaging==24.2 # via # -c requirements/base.txt # black @@ -79,14 +79,12 @@ packaging==24.1 pathspec==0.12.1 # via black pdf2image==1.17.0 - # via - # -c requirements/base.txt - # -r requirements/test.in -pillow==10.4.0 + # via -r requirements/test.in +pillow==11.1.0 # via # -c requirements/base.txt # pdf2image -platformdirs==4.3.2 +platformdirs==4.3.6 # via black pluggy==1.5.0 # via pytest @@ -96,11 +94,11 @@ pydocstyle==6.3.0 # via flake8-docstrings pyflakes==3.2.0 # via flake8 -pytest==8.3.3 +pytest==8.3.4 # via # pytest-cov # pytest-mock -pytest-cov==5.0.0 +pytest-cov==6.0.0 # via -r requirements/test.in pytest-mock==3.14.0 # via -r requirements/test.in @@ -112,25 +110,23 @@ requests==2.32.3 # via # -c requirements/base.txt # huggingface-hub -ruff==0.6.4 +ruff==0.9.2 # via -r requirements/test.in sniffio==1.3.1 - # via - # anyio - # httpx + # via anyio snowballstemmer==2.2.0 # via pydocstyle -tomli==2.0.1 +tomli==2.2.1 # via # black # coverage # mypy # pytest -tqdm==4.66.5 +tqdm==4.67.1 # via # -c requirements/base.txt # huggingface-hub -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20241230 # via -r requirements/test.in typing-extensions==4.12.2 # via @@ -139,7 +135,7 @@ typing-extensions==4.12.2 # black # huggingface-hub # mypy -urllib3==2.2.3 +urllib3==2.3.0 # via # -c requirements/base.txt # requests diff --git a/test_unstructured_inference/inference/test_layout_element.py b/test_unstructured_inference/inference/test_layout_element.py index f814c180..e2c33775 100644 --- a/test_unstructured_inference/inference/test_layout_element.py +++ b/test_unstructured_inference/inference/test_layout_element.py @@ -1,5 +1,5 @@ -from layoutparser.elements import TextBlock -from layoutparser.elements.layout_elements import Rectangle as LPRectangle + + from unstructured_inference.constants import Source from unstructured_inference.inference.layoutelement import LayoutElement, TextRegion @@ -24,22 +24,3 @@ def test_layout_element_from_region(mock_rectangle): assert LayoutElement.from_region(region) == expected -def test_layout_element_from_lp_textblock(): - mock_text_block = TextBlock( - block=LPRectangle(100, 100, 300, 300), - text="Sample Text", - type="Text", - score=0.99, - ) - - expected = LayoutElement.from_coords( - 100, - 100, - 300, - 300, - text="Sample Text", - source=Source.DETECTRON2_LP, - type="Text", - prob=0.99, - ) - assert LayoutElement.from_lp_textblock(mock_text_block) == expected diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index c290d75e..54090120 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "0.8.2" # pragma: no cover +__version__ = "0.8.3" # pragma: no cover diff --git a/unstructured_inference/inference/layoutelement.py b/unstructured_inference/inference/layoutelement.py index 8ccaeb9c..87712d58 100644 --- a/unstructured_inference/inference/layoutelement.py +++ b/unstructured_inference/inference/layoutelement.py @@ -4,7 +4,6 @@ from typing import Any, Collection, Iterable, List, Optional import numpy as np -from layoutparser.elements.layout import TextBlock from pandas import DataFrame from scipy.sparse.csgraph import connected_components @@ -185,24 +184,6 @@ def from_region(cls, region: TextRegion): source = region.source if hasattr(region, "source") else None return cls(text=text, source=source, type=type, prob=prob, bbox=region.bbox) - @classmethod - def from_lp_textblock(cls, textblock: TextBlock): - """Create LayoutElement from layoutparser TextBlock object.""" - x1, y1, x2, y2 = textblock.coordinates - text = textblock.text - type = textblock.type - prob = textblock.score - return cls.from_coords( - x1, - y1, - x2, - y2, - text=text, - source=Source.DETECTRON2_LP, - type=type, - prob=prob, - ) - def merge_inferred_layout_with_extracted_layout( inferred_layout: Collection[LayoutElement], From 012e619e8a6edf5989cf00353d2d9619254fd3ca Mon Sep 17 00:00:00 2001 From: tbs17 Date: Fri, 17 Jan 2025 15:45:32 -0500 Subject: [PATCH 2/8] include note about readme --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea577662..8595d70a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * fix: removed `layoutelement.from_lp_textblock()` and related tests as it's not used * fix: update requirements to drop `layoutparser` lib +* fix: update `README.md` to remove layoutparser model zoo support note ## 0.8.2 From 2d61cb857b49e7395aafcef323437c698f506d94 Mon Sep 17 00:00:00 2001 From: tbs17 Date: Fri, 17 Jan 2025 15:52:16 -0500 Subject: [PATCH 3/8] remove unused import --- unstructured_inference/inference/layoutelement.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unstructured_inference/inference/layoutelement.py b/unstructured_inference/inference/layoutelement.py index 87712d58..40cd0820 100644 --- a/unstructured_inference/inference/layoutelement.py +++ b/unstructured_inference/inference/layoutelement.py @@ -11,7 +11,6 @@ from unstructured_inference.constants import ( FULL_PAGE_REGION_THRESHOLD, ElementType, - Source, ) from unstructured_inference.inference.elements import ( ImageTextRegion, From 1c32d22c2615d9f61b6194046e8f74fe671eed79 Mon Sep 17 00:00:00 2001 From: tbs17 Date: Fri, 17 Jan 2025 16:05:11 -0500 Subject: [PATCH 4/8] reformat test_layout_element.py --- test_unstructured_inference/inference/test_layout_element.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test_unstructured_inference/inference/test_layout_element.py b/test_unstructured_inference/inference/test_layout_element.py index e2c33775..0992047f 100644 --- a/test_unstructured_inference/inference/test_layout_element.py +++ b/test_unstructured_inference/inference/test_layout_element.py @@ -1,6 +1,3 @@ - - - from unstructured_inference.constants import Source from unstructured_inference.inference.layoutelement import LayoutElement, TextRegion @@ -22,5 +19,3 @@ def test_layout_element_from_region(mock_rectangle): region = TextRegion(bbox=mock_rectangle) assert LayoutElement.from_region(region) == expected - - From c86003c42be99836032166a876c6c23dcf46264b Mon Sep 17 00:00:00 2001 From: tbs17 Date: Fri, 17 Jan 2025 16:22:48 -0500 Subject: [PATCH 5/8] remove unused import --- test_unstructured_inference/inference/test_layout_element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_unstructured_inference/inference/test_layout_element.py b/test_unstructured_inference/inference/test_layout_element.py index 0992047f..f0522650 100644 --- a/test_unstructured_inference/inference/test_layout_element.py +++ b/test_unstructured_inference/inference/test_layout_element.py @@ -1,4 +1,4 @@ -from unstructured_inference.constants import Source + from unstructured_inference.inference.layoutelement import LayoutElement, TextRegion From 98483c2e352373fa328627b02ebfe3cbe2fe42bb Mon Sep 17 00:00:00 2001 From: tbs17 Date: Fri, 17 Jan 2025 16:36:42 -0500 Subject: [PATCH 6/8] reformat again --- test_unstructured_inference/inference/test_layout_element.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_unstructured_inference/inference/test_layout_element.py b/test_unstructured_inference/inference/test_layout_element.py index f0522650..b0cd2966 100644 --- a/test_unstructured_inference/inference/test_layout_element.py +++ b/test_unstructured_inference/inference/test_layout_element.py @@ -1,4 +1,3 @@ - from unstructured_inference.inference.layoutelement import LayoutElement, TextRegion From ac4fad5a8ed836f9e77ef6d29c71074de45ff716 Mon Sep 17 00:00:00 2001 From: tbs17 Date: Fri, 17 Jan 2025 16:50:03 -0500 Subject: [PATCH 7/8] put back some packages --- requirements/base.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requirements/base.in b/requirements/base.in index 9bf49f77..a4ad49a2 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -11,3 +11,6 @@ timm # NOTE(alan): Pinned because this is when the most recent module we import appeared transformers>=4.25.1 rapidfuzz +pandas +scipy +pdfplumber \ No newline at end of file From 094af1274804fb08d6413c48da821b4fb9d186df Mon Sep 17 00:00:00 2001 From: tbs17 Date: Fri, 17 Jan 2025 17:03:16 -0500 Subject: [PATCH 8/8] update base/dev.txt --- requirements/base.txt | 31 +++++++++++++++++++++++++++++-- requirements/dev.txt | 8 ++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 27b303e7..6dd8d064 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,12 +6,18 @@ # certifi==2024.12.14 # via requests +cffi==1.17.1 + # via cryptography charset-normalizer==3.4.1 - # via requests + # via + # pdfminer-six + # requests coloredlogs==15.0.1 # via onnxruntime contourpy==1.3.0 # via matplotlib +cryptography==44.0.0 + # via pdfminer-six cycler==0.12.1 # via matplotlib filelock==3.16.1 @@ -59,6 +65,8 @@ numpy==1.26.4 # onnx # onnxruntime # opencv-python + # pandas + # scipy # torchvision # transformers onnx==1.17.0 @@ -73,20 +81,35 @@ packaging==24.2 # matplotlib # onnxruntime # transformers +pandas==2.2.3 + # via -r requirements/base.in +pdfminer-six==20231228 + # via pdfplumber +pdfplumber==0.11.5 + # via -r requirements/base.in pillow==11.1.0 # via # matplotlib + # pdfplumber # torchvision protobuf==5.29.3 # via # onnx # onnxruntime +pycparser==2.22 + # via cffi pyparsing==3.2.1 # via matplotlib +pypdfium2==4.30.1 + # via pdfplumber python-dateutil==2.9.0.post0 - # via matplotlib + # via + # matplotlib + # pandas python-multipart==0.0.20 # via -r requirements/base.in +pytz==2024.2 + # via pandas pyyaml==6.0.2 # via # huggingface-hub @@ -104,6 +127,8 @@ safetensors==0.5.2 # via # timm # transformers +scipy==1.13.1 + # via -r requirements/base.in six==1.17.0 # via python-dateutil sympy==1.13.1 @@ -131,6 +156,8 @@ typing-extensions==4.12.2 # via # huggingface-hub # torch +tzdata==2024.2 + # via pandas urllib3==2.3.0 # via requests zipp==3.21.0 diff --git a/requirements/dev.txt b/requirements/dev.txt index d6a933f2..b4ea4117 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -41,7 +41,9 @@ certifi==2024.12.14 # httpx # requests cffi==1.17.1 - # via argon2-cffi-bindings + # via + # -c requirements/base.txt + # argon2-cffi-bindings charset-normalizer==3.4.1 # via # -c requirements/base.txt @@ -284,7 +286,9 @@ ptyprocess==0.7.0 pure-eval==0.2.3 # via stack-data pycparser==2.22 - # via cffi + # via + # -c requirements/base.txt + # cffi pygments==2.19.1 # via # ipython