diff --git a/CHANGELOG.md b/CHANGELOG.md index 39740ec23c..3a3f8f7a6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## 0.16.15-dev0 + +### Enhancements + +### Features + +### Fixes +- **Update `unstructured-inference`** to 0.8.3 in requirements which removed `layoutparser` dependency libs +- **Update `pdfminer-six` to 20240706** + ## 0.16.14 ### Enhancements diff --git a/requirements/base.txt b/requirements/base.txt index 6fecb30c04..57ca655747 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,7 +4,7 @@ # # pip-compile ./base.in # -anyio==4.7.0 +anyio==4.8.0 # via httpx backoff==2.2.1 # via -r ./base.in @@ -36,7 +36,7 @@ dataclasses-json==0.6.7 # unstructured-client deepdiff==8.1.1 # via unstructured-client -emoji==2.14.0 +emoji==2.14.1 # via -r ./base.in exceptiongroup==1.2.2 # via anyio @@ -64,7 +64,7 @@ langdetect==1.0.9 # via -r ./base.in lxml==5.3.0 # via -r ./base.in -marshmallow==3.23.2 +marshmallow==3.25.1 # via # dataclasses-json # unstructured-client @@ -150,5 +150,5 @@ urllib3==1.26.20 # unstructured-client webencodings==0.5.1 # via html5lib -wrapt==1.17.0 +wrapt==1.17.2 # via -r ./base.in diff --git a/requirements/deps/constraints.txt b/requirements/deps/constraints.txt index 5700719383..296dd366b5 100644 --- a/requirements/deps/constraints.txt +++ b/requirements/deps/constraints.txt @@ -3,8 +3,8 @@ # extras. Putting a dependency here will only affect dependency sets that contain them -- in other # words, if something does not require a constraint, it will not be installed. #################################################################################################### -# (jennings): Versions greater than 5.0 create dependency conflicts with other packages -protobuf<5.0 +# we are using v3 client https://weaviate.io/developers/weaviate/client-libraries/python/python_v3 +weaviate-client>=3.26.7,<4.0.0 # TODO: Constriant due to multiple versions being installed during pip-compile grpcio>=1.65.5 # TODO: Pinned in transformers package, remove when that gets updated (https://github.com/huggingface/transformers/blob/main/setup.py) diff --git a/requirements/dev.txt b/requirements/dev.txt index 30e42eb0ac..a5ebd99214 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -15,11 +15,11 @@ click==8.1.8 # pip-tools distlib==0.3.9 # via virtualenv -filelock==3.16.1 +filelock==3.17.0 # via virtualenv -identify==2.6.4 +identify==2.6.6 # via pre-commit -importlib-metadata==8.5.0 +importlib-metadata==8.6.1 # via # -c ././deps/constraints.txt # build @@ -36,7 +36,7 @@ platformdirs==4.3.6 # via # -c ./test.txt # virtualenv -pre-commit==4.0.1 +pre-commit==4.1.0 # via -r ./dev.in pyproject-hooks==1.2.0 # via @@ -51,7 +51,7 @@ tomli==2.2.1 # -c ./test.txt # build # pip-tools -virtualenv==20.28.1 +virtualenv==20.29.1 # via pre-commit wheel==0.45.1 # via pip-tools diff --git a/requirements/extra-csv.txt b/requirements/extra-csv.txt index 496cd42fc1..d4d50645e8 100644 --- a/requirements/extra-csv.txt +++ b/requirements/extra-csv.txt @@ -20,5 +20,5 @@ six==1.17.0 # via # -c ./base.txt # python-dateutil -tzdata==2024.2 +tzdata==2025.1 # via pandas diff --git a/requirements/extra-epub.txt b/requirements/extra-epub.txt index a9533059da..460408c418 100644 --- a/requirements/extra-epub.txt +++ b/requirements/extra-epub.txt @@ -4,5 +4,5 @@ # # pip-compile ./extra-epub.in # -pypandoc==1.14 +pypandoc==1.15 # via -r ./extra-epub.in diff --git a/requirements/extra-markdown.txt b/requirements/extra-markdown.txt index 243fd0b0da..9d0a14da55 100644 --- a/requirements/extra-markdown.txt +++ b/requirements/extra-markdown.txt @@ -4,7 +4,7 @@ # # pip-compile ./extra-markdown.in # -importlib-metadata==8.5.0 +importlib-metadata==8.6.1 # via # -c ././deps/constraints.txt # markdown diff --git a/requirements/extra-odt.txt b/requirements/extra-odt.txt index 28ebf301a6..362c53ed74 100644 --- a/requirements/extra-odt.txt +++ b/requirements/extra-odt.txt @@ -8,7 +8,7 @@ lxml==5.3.0 # via # -c ./base.txt # python-docx -pypandoc==1.14 +pypandoc==1.15 # via -r ./extra-odt.in python-docx==1.1.2 # via -r ./extra-odt.in diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt index d7c0fe7226..f4f22d18fd 100644 --- a/requirements/extra-paddleocr.txt +++ b/requirements/extra-paddleocr.txt @@ -4,7 +4,7 @@ # # pip-compile ./extra-paddleocr.in # -anyio==4.7.0 +anyio==4.8.0 # via # -c ./base.txt # httpx @@ -32,7 +32,7 @@ exceptiongroup==1.2.2 # via # -c ./base.txt # anyio -fonttools==4.55.3 +fonttools==4.55.4 # via matplotlib h11==0.14.0 # via @@ -52,13 +52,13 @@ idna==3.10 # anyio # httpx # requests -imageio==2.36.1 +imageio==2.37.0 # via # imgaug # scikit-image imgaug==0.4.0 # via unstructured-paddleocr -importlib-resources==6.5.1 +importlib-resources==6.5.2 # via matplotlib kiwisolver==1.4.7 # via matplotlib @@ -86,9 +86,9 @@ numpy==1.26.4 # shapely # tifffile # unstructured-paddleocr -opencv-contrib-python==4.10.0.84 +opencv-contrib-python==4.11.0.86 # via unstructured-paddleocr -opencv-python==4.10.0.84 +opencv-python==4.11.0.86 # via # imgaug # unstructured-paddleocr @@ -113,10 +113,8 @@ pillow==11.1.0 # pdf2image # scikit-image # unstructured-paddleocr -protobuf==4.25.5 - # via - # -c ././deps/constraints.txt - # paddlepaddle +protobuf==5.29.3 + # via paddlepaddle pyclipper==1.3.0.post6 # via unstructured-paddleocr pyparsing==3.2.1 diff --git a/requirements/extra-pandoc.txt b/requirements/extra-pandoc.txt index 4125059733..dd397c3845 100644 --- a/requirements/extra-pandoc.txt +++ b/requirements/extra-pandoc.txt @@ -4,5 +4,5 @@ # # pip-compile ./extra-pandoc.in # -pypandoc==1.14 +pypandoc==1.15 # via -r ./extra-pandoc.in diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in index ae3ccdf381..99df481053 100644 --- a/requirements/extra-pdf-image.in +++ b/requirements/extra-pdf-image.in @@ -11,5 +11,5 @@ google-cloud-vision effdet # Do not move to constraints.in, otherwise unstructured-inference will not be upgraded # when unstructured library is. -unstructured-inference==0.8.1 +unstructured-inference>=0.8.6 unstructured.pytesseract>=0.3.12 diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index 81b61276ef..59fcfb8326 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -6,7 +6,7 @@ # antlr4-python3-runtime==4.9.3 # via omegaconf -cachetools==5.5.0 +cachetools==5.5.1 # via google-auth certifi==2024.12.14 # via @@ -35,14 +35,14 @@ deprecated==1.2.15 # via pikepdf effdet==0.4.1 # via -r ./extra-pdf-image.in -filelock==3.16.1 +filelock==3.17.0 # via # huggingface-hub # torch # transformers -flatbuffers==24.12.23 +flatbuffers==25.1.21 # via onnxruntime -fonttools==4.55.3 +fonttools==4.55.4 # via matplotlib fsspec==2024.12.0 # via @@ -60,14 +60,14 @@ googleapis-common-protos==1.66.0 # via # google-api-core # grpcio-status -grpcio==1.68.1 +grpcio==1.69.0 # via # -c ././deps/constraints.txt # google-api-core # grpcio-status -grpcio-status==1.62.3 +grpcio-status==1.69.0 # via google-api-core -huggingface-hub==0.27.0 +huggingface-hub==0.27.1 # via # timm # tokenizers @@ -79,16 +79,12 @@ idna==3.10 # via # -c ./base.txt # requests -importlib-resources==6.5.1 +importlib-resources==6.5.2 # via matplotlib -iopath==0.1.10 - # via layoutparser jinja2==3.1.5 # via torch kiwisolver==1.4.7 # via matplotlib -layoutparser==0.3.4 - # via unstructured-inference lxml==5.3.0 # via # -c ./base.txt @@ -107,7 +103,6 @@ numpy==1.26.4 # via # -c ./base.txt # contourpy - # layoutparser # matplotlib # onnx # onnxruntime @@ -126,10 +121,8 @@ onnx==1.17.0 # unstructured-inference onnxruntime==1.19.2 # via unstructured-inference -opencv-python==4.10.0.84 - # via - # layoutparser - # unstructured-inference +opencv-python==4.11.0.86 + # via unstructured-inference packaging==24.2 # via # -c ./base.txt @@ -140,40 +133,31 @@ packaging==24.2 # transformers # unstructured-pytesseract pandas==2.2.3 - # via layoutparser + # via unstructured-inference pdf2image==1.17.0 + # via -r ./extra-pdf-image.in +pdfminer-six==20240706 # via # -r ./extra-pdf-image.in - # layoutparser -pdfminer-six==20231228 - # via - # -r ./extra-pdf-image.in - # pdfplumber -pdfplumber==0.11.5 - # via layoutparser + # unstructured-inference pi-heif==0.21.0 # via -r ./extra-pdf-image.in -pikepdf==9.5.0 +pikepdf==9.5.1 # via -r ./extra-pdf-image.in pillow==11.1.0 # via - # layoutparser # matplotlib # pdf2image - # pdfplumber # pi-heif # pikepdf # torchvision # unstructured-pytesseract -portalocker==3.1.1 - # via iopath proto-plus==1.25.0 # via # google-api-core # google-cloud-vision -protobuf==4.25.5 +protobuf==5.29.3 # via - # -c ././deps/constraints.txt # google-api-core # google-cloud-vision # googleapis-common-protos @@ -200,7 +184,7 @@ pypdf==5.1.0 # -c ./base.txt # -r ./extra-pdf-image.in pypdfium2==4.30.1 - # via pdfplumber + # via unstructured-inference python-dateutil==2.9.0.post0 # via # -c ./base.txt @@ -213,7 +197,6 @@ pytz==2024.2 pyyaml==6.0.2 # via # huggingface-hub - # layoutparser # omegaconf # timm # transformers @@ -233,12 +216,12 @@ requests==2.32.3 # transformers rsa==4.9 # via google-auth -safetensors==0.5.0 +safetensors==0.5.2 # via # timm # transformers scipy==1.13.1 - # via layoutparser + # via unstructured-inference six==1.17.0 # via # -c ./base.txt @@ -247,7 +230,7 @@ sympy==1.13.1 # via # onnxruntime # torch -timm==1.0.12 +timm==1.0.14 # via # effdet # unstructured-inference @@ -269,7 +252,6 @@ tqdm==4.67.1 # via # -c ./base.txt # huggingface-hub - # iopath # transformers transformers==4.44.2 # via unstructured-inference @@ -277,12 +259,11 @@ typing-extensions==4.12.2 # via # -c ./base.txt # huggingface-hub - # iopath # pypdf # torch -tzdata==2024.2 +tzdata==2025.1 # via pandas -unstructured-inference==0.8.1 +unstructured-inference==0.8.6 # via -r ./extra-pdf-image.in unstructured-pytesseract==0.3.13 # via -r ./extra-pdf-image.in @@ -291,7 +272,7 @@ urllib3==1.26.20 # -c ././deps/constraints.txt # -c ./base.txt # requests -wrapt==1.17.0 +wrapt==1.17.2 # via # -c ./base.txt # deprecated diff --git a/requirements/extra-xlsx.txt b/requirements/extra-xlsx.txt index 7f00c057a2..b0c6cadbf7 100644 --- a/requirements/extra-xlsx.txt +++ b/requirements/extra-xlsx.txt @@ -26,7 +26,7 @@ six==1.17.0 # via # -c ./base.txt # python-dateutil -tzdata==2024.2 +tzdata==2025.1 # via pandas xlrd==2.0.1 # via -r ./extra-xlsx.in diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index 7051a2233b..e614f90a3b 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -16,7 +16,7 @@ click==8.1.8 # via # -c ./base.txt # sacremoses -filelock==3.16.1 +filelock==3.17.0 # via # huggingface-hub # torch @@ -25,7 +25,7 @@ fsspec==2024.12.0 # via # huggingface-hub # torch -huggingface-hub==0.27.0 +huggingface-hub==0.27.1 # via # tokenizers # transformers @@ -74,7 +74,7 @@ requests==2.32.3 # transformers sacremoses==0.1.1 # via -r ./huggingface.in -safetensors==0.5.0 +safetensors==0.5.2 # via transformers sentencepiece==0.2.0 # via -r ./huggingface.in diff --git a/requirements/test.txt b/requirements/test.txt index a7e1d2cfa2..87b9d7cc52 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -6,7 +6,7 @@ # annotated-types==0.7.0 # via pydantic -anyio==4.7.0 +anyio==4.8.0 # via # -c ./base.txt # httpx @@ -54,7 +54,7 @@ exceptiongroup==1.2.2 # -c ./base.txt # anyio # pytest -faker==33.1.0 +faker==34.0.0 # via jsf flake8==7.1.1 # via @@ -66,7 +66,7 @@ freezegun==1.5.1 # via -r ./test.in genson==1.3.0 # via datamodel-code-generator -grpcio==1.68.1 +grpcio==1.69.0 # via # -c ././deps/constraints.txt # -r ./test.in @@ -164,7 +164,7 @@ pycodestyle==2.12.1 # via # flake8 # flake8-print -pydantic[email]==2.10.4 +pydantic[email]==2.10.5 # via # -r ./test.in # datamodel-code-generator @@ -196,7 +196,7 @@ pyyaml==6.0.2 # via # datamodel-code-generator # vcrpy -referencing==0.35.1 +referencing==0.36.1 # via # jsonschema # jsonschema-specifications @@ -218,7 +218,7 @@ rpds-py==0.22.3 # referencing rstr==3.2.2 # via jsf -ruff==0.8.5 +ruff==0.9.2 # via -r ./test.in semantic-version==2.10.0 # via liccheck @@ -269,7 +269,8 @@ typing-extensions==4.12.2 # mypy # pydantic # pydantic-core -tzdata==2024.2 + # referencing +tzdata==2025.1 # via pandas ujson==5.10.0 # via label-studio-sdk @@ -281,7 +282,7 @@ urllib3==1.26.20 # vcrpy vcrpy==7.0.0 # via -r ./test.in -wrapt==1.17.0 +wrapt==1.17.2 # via # -c ./base.txt # smart-open diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 4b8c503c08..b93455ea08 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.16.14" # pragma: no cover +__version__ = "0.16.15-dev0" # pragma: no cover