From 3697b6fb26a12f7e9be13551592c7a292cdffc67 Mon Sep 17 00:00:00 2001 From: Philippe Prados Date: Thu, 3 Apr 2025 13:37:15 +0200 Subject: [PATCH 1/3] Fix pdfminer-six dependencies. See https://github.com/pdfminer/pdfminer.six/pull/1081 --- CHANGELOG.md | 4 + requirements/base.in | 17 ++- requirements/base.txt | 79 ++++++++++---- requirements/dev.in | 2 +- requirements/dev.txt | 134 +++++++++--------------- requirements/test.txt | 67 +++++------- unstructured_inference/__version__.py | 2 +- unstructured_inference/models/base.py | 2 - unstructured_inference/models/tables.py | 1 - 9 files changed, 156 insertions(+), 152 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09ca336d..974117bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.8.11-dev0 + +* fix: fix dependencies for python 3.9 + ## 0.8.10 * feat: unpin `numpy` and bump minimum for `onnxruntime` to be compatible with `numpy>=2` diff --git a/requirements/base.in b/requirements/base.in index 436c5fdc..4739f8d9 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,17 +1,24 @@ -c constraints.in python-multipart huggingface-hub -numpy +numpy==2.0.2 opencv-python!=4.7.0.68 onnx -onnxruntime>=1.18.0 -matplotlib +onnxruntime==1.19.2 +# contourpy >3.9 is for python 3.10 +matplotlib==3.9.4 torch timm # NOTE(alan): Pinned because this is when the most recent module we import appeared transformers>=4.25.1 rapidfuzz pandas -scipy +scipy==1.13.1 pypdfium2 -pdfminer-six +pdfminer-six>=20250327 +# contourpy >1.3.0 is for python 3.10 +contourpy==1.3.0 +# kiwisolver >1.4.7 is for python 3.10 +kiwisolver==1.4.7 +# networkx >3.2.1 is for python 3.10 +networkx==3.2.1 diff --git a/requirements/base.txt b/requirements/base.txt index c65bbce6..877b237a 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/base.in @@ -15,25 +15,27 @@ charset-normalizer==3.4.1 coloredlogs==15.0.1 # via onnxruntime contourpy==1.3.0 - # via matplotlib + # via + # -r requirements/base.in + # matplotlib cryptography==44.0.2 # via pdfminer-six cycler==0.12.1 # via matplotlib -filelock==3.17.0 +filelock==3.18.0 # via # huggingface-hub # torch # transformers flatbuffers==25.2.10 # via onnxruntime -fonttools==4.56.0 +fonttools==4.57.0 # via matplotlib -fsspec==2025.3.0 +fsspec==2025.3.2 # via # huggingface-hub # torch -huggingface-hub==0.29.3 +huggingface-hub==0.30.1 # via # -r requirements/base.in # timm @@ -43,12 +45,12 @@ humanfriendly==10.0 # via coloredlogs idna==3.10 # via requests -importlib-resources==6.5.2 - # via matplotlib jinja2==3.1.6 # via torch kiwisolver==1.4.7 - # via matplotlib + # via + # -r requirements/base.in + # matplotlib markupsafe==3.0.2 # via jinja2 matplotlib==3.9.4 @@ -56,7 +58,9 @@ matplotlib==3.9.4 mpmath==1.3.0 # via sympy networkx==3.2.1 - # via torch + # via + # -r requirements/base.in + # torch numpy==2.0.2 # via # -r requirements/base.in @@ -69,6 +73,40 @@ numpy==2.0.2 # scipy # torchvision # transformers +nvidia-cublas-cu12==12.4.5.8 + # via + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.4.127 + # via torch +nvidia-cuda-nvrtc-cu12==12.4.127 + # via torch +nvidia-cuda-runtime-cu12==12.4.127 + # via torch +nvidia-cudnn-cu12==9.1.0.70 + # via torch +nvidia-cufft-cu12==11.2.1.3 + # via torch +nvidia-curand-cu12==10.3.5.147 + # via torch +nvidia-cusolver-cu12==11.6.1.9 + # via torch +nvidia-cusparse-cu12==12.3.1.170 + # via + # nvidia-cusolver-cu12 + # torch +nvidia-cusparselt-cu12==0.6.2 + # via torch +nvidia-nccl-cu12==2.21.5 + # via torch +nvidia-nvjitlink-cu12==12.4.127 + # via + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 + # torch +nvidia-nvtx-cu12==12.4.127 + # via torch onnx==1.17.0 # via -r requirements/base.in onnxruntime==1.19.2 @@ -83,19 +121,19 @@ packaging==24.2 # transformers pandas==2.2.3 # via -r requirements/base.in -pdfminer-six==20240706 +pdfminer-six==20250327 # via -r requirements/base.in pillow==11.1.0 # via # matplotlib # torchvision -protobuf==6.30.0 +protobuf==6.30.2 # via # onnx # onnxruntime pycparser==2.22 # via cffi -pyparsing==3.2.1 +pyparsing==3.2.3 # via matplotlib pypdfium2==4.30.1 # via -r requirements/base.in @@ -105,7 +143,7 @@ python-dateutil==2.9.0.post0 # pandas python-multipart==0.0.20 # via -r requirements/base.in -pytz==2025.1 +pytz==2025.2 # via pandas pyyaml==6.0.2 # via @@ -147,15 +185,18 @@ tqdm==4.67.1 # via # huggingface-hub # transformers -transformers==4.49.0 +transformers==4.50.3 # via -r requirements/base.in -typing-extensions==4.12.2 +triton==3.2.0 + # via torch +typing-extensions==4.13.0 # via # huggingface-hub # torch -tzdata==2025.1 +tzdata==2025.2 # via pandas urllib3==2.3.0 # via requests -zipp==3.21.0 - # via importlib-resources + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/requirements/dev.in b/requirements/dev.in index 9fdba403..4b81a167 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -2,6 +2,6 @@ -c base.txt -c test.txt jupyter -ipython +ipython==8.18.1 pip-tools matplotlib \ No newline at end of file diff --git a/requirements/dev.txt b/requirements/dev.txt index 26b8668d..601fa00c 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,16 +1,14 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/dev.in # -anyio==4.8.0 +anyio==4.9.0 # via - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # httpx # jupyter-server -appnope==0.1.4 - # via ipykernel argon2-cffi==23.1.0 # via jupyter-server argon2-cffi-bindings==21.2.0 @@ -19,7 +17,7 @@ arrow==1.3.0 # via isoduration asttokens==3.0.0 # via stack-data -async-lru==2.0.4 +async-lru==2.0.5 # via jupyterlab attrs==25.3.0 # via @@ -35,23 +33,23 @@ build==1.2.2.post1 # via pip-tools certifi==2025.1.31 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # httpcore # httpx # requests cffi==1.17.1 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # argon2-cffi-bindings charset-normalizer==3.4.1 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # requests click==8.1.8 # via - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # pip-tools comm==0.2.2 # via @@ -59,11 +57,11 @@ comm==0.2.2 # ipywidgets contourpy==1.3.0 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib cycler==0.12.1 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib debugpy==1.8.13 # via ipykernel @@ -71,53 +69,36 @@ decorator==5.2.1 # via ipython defusedxml==0.7.1 # via nbconvert -exceptiongroup==1.2.2 - # via - # -c requirements/test.txt - # anyio - # ipython executing==2.2.0 # via stack-data fastjsonschema==2.21.1 # via nbformat -fonttools==4.56.0 +fonttools==4.57.0 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib fqdn==1.5.1 # via jsonschema h11==0.14.0 # via - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # httpcore httpcore==1.0.7 # via - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # httpx httpx==0.28.1 # via - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # jupyterlab idna==3.10 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # anyio # httpx # jsonschema # requests -importlib-metadata==8.6.1 - # via - # build - # jupyter-client - # jupyter-lsp - # jupyterlab - # jupyterlab-server - # nbconvert -importlib-resources==6.5.2 - # via - # -c requirements/base.txt - # matplotlib ipykernel==6.29.5 # via # jupyter @@ -137,12 +118,12 @@ jedi==0.19.2 # via ipython jinja2==3.1.6 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # jupyter-server # jupyterlab # jupyterlab-server # nbconvert -json5==0.10.0 +json5==0.11.0 # via jupyterlab-server jsonpointer==3.0.0 # via jsonschema @@ -186,7 +167,7 @@ jupyter-server==2.15.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.3.5 +jupyterlab==4.3.6 # via # jupyter # notebook @@ -200,22 +181,22 @@ jupyterlab-widgets==3.0.13 # via ipywidgets kiwisolver==1.4.7 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib markupsafe==3.0.2 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # jinja2 # nbconvert matplotlib==3.9.4 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -r requirements/dev.in matplotlib-inline==0.1.7 # via # ipykernel # ipython -mistune==3.1.2 +mistune==3.1.3 # via nbconvert nbclient==0.10.2 # via nbconvert @@ -230,7 +211,7 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -notebook==7.3.2 +notebook==7.3.3 # via jupyter notebook-shim==0.2.4 # via @@ -238,15 +219,15 @@ notebook-shim==0.2.4 # notebook numpy==2.0.2 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # contourpy # matplotlib overrides==7.7.0 # via jupyter-server packaging==24.2 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # build # ipykernel # jupyter-events @@ -263,14 +244,14 @@ pexpect==4.9.0 # via ipython pillow==11.1.0 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # matplotlib pip-tools==7.4.1 # via -r requirements/dev.in -platformdirs==4.3.6 +platformdirs==4.3.7 # via - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # jupyter-core prometheus-client==0.21.1 # via jupyter-server @@ -288,16 +269,16 @@ pure-eval==0.2.3 # via stack-data pycparser==2.22 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # cffi pygments==2.19.1 # via # ipython # jupyter-console # nbconvert -pyparsing==3.2.1 +pyparsing==3.2.3 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib pyproject-hooks==1.2.0 # via @@ -305,7 +286,7 @@ pyproject-hooks==1.2.0 # pip-tools python-dateutil==2.9.0.post0 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # arrow # jupyter-client # matplotlib @@ -313,8 +294,8 @@ python-json-logger==3.3.0 # via jupyter-events pyyaml==6.0.2 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # jupyter-events pyzmq==26.3.0 # via @@ -329,8 +310,8 @@ referencing==0.36.2 # jupyter-events requests==2.32.3 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # jupyterlab-server rfc3339-validator==0.1.4 # via @@ -340,7 +321,7 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rpds-py==0.23.1 +rpds-py==0.24.0 # via # jsonschema # referencing @@ -348,12 +329,12 @@ send2trash==1.8.3 # via jupyter-server six==1.17.0 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # python-dateutil # rfc3339-validator sniffio==1.3.1 # via - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # anyio soupsieve==2.6 # via beautifulsoup4 @@ -365,12 +346,6 @@ terminado==0.18.1 # jupyter-server-terminals tinycss2==1.4.0 # via bleach -tomli==2.2.1 - # via - # -c requirements/test.txt - # build - # jupyterlab - # pip-tools tornado==6.4.2 # via # ipykernel @@ -397,23 +372,19 @@ traitlets==5.14.3 # nbformat types-python-dateutil==2.9.0.20241206 # via arrow -typing-extensions==4.12.2 +typing-extensions==4.13.0 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # anyio - # async-lru # beautifulsoup4 - # ipython - # mistune - # python-json-logger # referencing uri-template==1.3.0 # via jsonschema urllib3==2.3.0 # via - # -c requirements/base.txt - # -c requirements/test.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # requests wcwidth==0.2.13 # via prompt-toolkit @@ -429,11 +400,6 @@ wheel==0.45.1 # via pip-tools widgetsnbextension==4.0.13 # via ipywidgets -zipp==3.21.0 - # via - # -c requirements/base.txt - # importlib-metadata - # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/test.txt b/requirements/test.txt index 13ded3ce..725006ca 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,48 +1,44 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/test.in # -anyio==4.8.0 +anyio==4.9.0 # via httpx black==25.1.0 # via -r requirements/test.in certifi==2025.1.31 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # httpcore # httpx # requests charset-normalizer==3.4.1 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # requests click==8.1.8 # via # -r requirements/test.in # black -coverage[toml]==7.6.12 +coverage[toml]==7.8.0 # via # -r requirements/test.in # pytest-cov -exceptiongroup==1.2.2 +filelock==3.18.0 # via - # anyio - # pytest -filelock==3.17.0 - # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # huggingface-hub -flake8==7.1.2 +flake8==7.2.0 # via # -r requirements/test.in # flake8-docstrings flake8-docstrings==1.7.0 # via -r requirements/test.in -fsspec==2025.3.0 +fsspec==2025.3.2 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # huggingface-hub h11==0.14.0 # via httpcore @@ -50,17 +46,17 @@ httpcore==1.0.7 # via httpx httpx==0.28.1 # via -r requirements/test.in -huggingface-hub==0.29.3 +huggingface-hub==0.30.1 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -r requirements/test.in idna==3.10 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # anyio # httpx # requests -iniconfig==2.0.0 +iniconfig==2.1.0 # via pytest mccabe==0.7.0 # via flake8 @@ -72,7 +68,7 @@ mypy-extensions==1.0.0 # mypy packaging==24.2 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # black # huggingface-hub # pytest @@ -82,60 +78,53 @@ pdf2image==1.17.0 # via -r requirements/test.in pillow==11.1.0 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # pdf2image -platformdirs==4.3.6 +platformdirs==4.3.7 # via black pluggy==1.5.0 # via pytest -pycodestyle==2.12.1 +pycodestyle==2.13.0 # via flake8 pydocstyle==6.3.0 # via flake8-docstrings -pyflakes==3.2.0 +pyflakes==3.3.2 # via flake8 pytest==8.3.5 # via # pytest-cov # pytest-mock -pytest-cov==6.0.0 +pytest-cov==6.1.0 # via -r requirements/test.in pytest-mock==3.14.0 # via -r requirements/test.in pyyaml==6.0.2 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # huggingface-hub requests==2.32.3 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # huggingface-hub -ruff==0.10.0 +ruff==0.11.2 # via -r requirements/test.in sniffio==1.3.1 # via anyio snowballstemmer==2.2.0 # via pydocstyle -tomli==2.2.1 - # via - # black - # coverage - # mypy - # pytest tqdm==4.67.1 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # huggingface-hub -types-pyyaml==6.0.12.20241230 +types-pyyaml==6.0.12.20250402 # via -r requirements/test.in -typing-extensions==4.12.2 +typing-extensions==4.13.0 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # anyio - # black # huggingface-hub # mypy urllib3==2.3.0 # via - # -c requirements/base.txt + # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # requests diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 4ab59b42..51560d41 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "0.8.10" # pragma: no cover +__version__ = "0.8.11-dev0" # pragma: no cover diff --git a/unstructured_inference/models/base.py b/unstructured_inference/models/base.py index eef0844c..38003278 100644 --- a/unstructured_inference/models/base.py +++ b/unstructured_inference/models/base.py @@ -46,8 +46,6 @@ def get_model(model_name: Optional[str] = None) -> UnstructuredModel: # TODO(alan): These cases are similar enough that we can probably do them all together with # importlib - global models - if model_name is None: default_name_from_env = os.environ.get("UNSTRUCTURED_DEFAULT_MODEL_NAME") model_name = default_name_from_env if default_name_from_env is not None else DEFAULT_MODEL diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index c390378e..9a7146dc 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -135,7 +135,6 @@ def run_prediction( def load_agent(): """Loads the Table agent as a global variable to ensure that we only load it once.""" - global tables_agent if not hasattr(tables_agent, "model"): logger.info("Loading the Table agent ...") From 7142a20a83e7505a4102876e101170498261d7d2 Mon Sep 17 00:00:00 2001 From: Philippe Prados Date: Mon, 7 Apr 2025 18:05:48 +0200 Subject: [PATCH 2/3] Fix pdfminer-six dependencies. See https://github.com/pdfminer/pdfminer.six/pull/1081 --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 2 +- Makefile | 10 +++++----- requirements/base.in | 28 ++++++++++++++++------------ requirements/base.txt | 20 ++++++++++---------- requirements/dev.in | 3 ++- requirements/dev.txt | 19 +++++++++++-------- requirements/test.txt | 8 ++++---- 8 files changed, 50 insertions(+), 42 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c25520e5..86198f32 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,7 +97,7 @@ jobs: with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ secrets.AWS_DEFAULT_REGION }} + aws-region: ${{ secrets.AWS_REGION }} - name: Test env: UNSTRUCTURED_HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 974117bf..59c0e6cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## 0.8.11-dev0 -* fix: fix dependencies for python 3.9 +* fix: dependencies of pdfminer-six and python 3.9 ## 0.8.10 diff --git a/Makefile b/Makefile index 35ec2e2e..173b2ed4 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKAGE_NAME := unstructured_inference -PIP_VERSION := 23.2.1 +PIP_VERSION := 25.0.1 CURRENT_DIR := $(shell pwd) @@ -14,7 +14,7 @@ help: Makefile ## install-base: installs core requirements needed for text processing bricks .PHONY: install-base -install-base: install-base-pip-packages +install-base: install-base-pip-packages requirements/base.in python3 -m pip install -r requirements/base.txt ## install: installs all test, dev, and experimental requirements @@ -29,16 +29,16 @@ install-base-pip-packages: python3 -m pip install pip==${PIP_VERSION} .PHONY: install-test -install-test: install-base +install-test: install-base requirements/test.txt python3 -m pip install -r requirements/test.txt .PHONY: install-dev -install-dev: install-test +install-dev: install-test requirements/dev.txt python3 -m pip install -r requirements/dev.txt ## pip-compile: compiles all base/dev/test requirements .PHONY: pip-compile -pip-compile: +pip-compile: requirements/base.txt requirements/test.txt requirements/dev.txt pip-compile --upgrade requirements/base.in pip-compile --upgrade requirements/test.in pip-compile --upgrade requirements/dev.in diff --git a/requirements/base.in b/requirements/base.in index 4739f8d9..82e2b05d 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,24 +1,28 @@ -c constraints.in python-multipart huggingface-hub -numpy==2.0.2 +numpy<2.1 ; python_version <= '3.9' +numpy ; python_version > '3.10' opencv-python!=4.7.0.68 onnx -onnxruntime==1.19.2 -# contourpy >3.9 is for python 3.10 -matplotlib==3.9.4 +onnxruntime<1.20 ; python_version <= '3.9' +onnxruntime ; python_version >= '3.10' +matplotlib<3.10 ; python_version <= '3.9' +matplotlib ; python_version >= '3.10' torch timm # NOTE(alan): Pinned because this is when the most recent module we import appeared -transformers>=4.25.1 +#transformers>=4.25.1 +transformers>=4.25.1,<4.51 rapidfuzz pandas -scipy==1.13.1 +scipy<1.14 ; python_version <= '3.9' +scipy ; python_version >= '3.10' pypdfium2 pdfminer-six>=20250327 -# contourpy >1.3.0 is for python 3.10 -contourpy==1.3.0 -# kiwisolver >1.4.7 is for python 3.10 -kiwisolver==1.4.7 -# networkx >3.2.1 is for python 3.10 -networkx==3.2.1 +contourpy<1.4.0 ; python_version <= '3.9' +contourpy ; python_version >= '3.10' +kiwisolver<1.5 ; python_version <= '3.9' +kiwisolver ; python_version >= '3.10' +networkx==3.2.1 ; python_version <= '3.9' +networkx ; python_version >= '3.10' diff --git a/requirements/base.txt b/requirements/base.txt index 877b237a..b4d2c15b 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -14,7 +14,7 @@ charset-normalizer==3.4.1 # requests coloredlogs==15.0.1 # via onnxruntime -contourpy==1.3.0 +contourpy==1.3.1 ; python_version >= "3.10" # via # -r requirements/base.in # matplotlib @@ -35,7 +35,7 @@ fsspec==2025.3.2 # via # huggingface-hub # torch -huggingface-hub==0.30.1 +huggingface-hub==0.30.2 # via # -r requirements/base.in # timm @@ -47,21 +47,21 @@ idna==3.10 # via requests jinja2==3.1.6 # via torch -kiwisolver==1.4.7 +kiwisolver==1.4.8 ; python_version >= "3.10" # via # -r requirements/base.in # matplotlib markupsafe==3.0.2 # via jinja2 -matplotlib==3.9.4 +matplotlib==3.10.1 ; python_version >= "3.10" # via -r requirements/base.in mpmath==1.3.0 # via sympy -networkx==3.2.1 +networkx==3.4.2 ; python_version >= "3.10" # via # -r requirements/base.in # torch -numpy==2.0.2 +numpy==2.2.4 ; python_version > "3.10" # via # -r requirements/base.in # contourpy @@ -109,7 +109,7 @@ nvidia-nvtx-cu12==12.4.127 # via torch onnx==1.17.0 # via -r requirements/base.in -onnxruntime==1.19.2 +onnxruntime==1.21.0 ; python_version >= "3.10" # via -r requirements/base.in opencv-python==4.11.0.86 # via -r requirements/base.in @@ -150,7 +150,7 @@ pyyaml==6.0.2 # huggingface-hub # timm # transformers -rapidfuzz==3.12.2 +rapidfuzz==3.13.0 # via -r requirements/base.in regex==2024.11.6 # via transformers @@ -162,7 +162,7 @@ safetensors==0.5.3 # via # timm # transformers -scipy==1.13.1 +scipy==1.15.2 ; python_version >= "3.10" # via -r requirements/base.in six==1.17.0 # via python-dateutil @@ -189,7 +189,7 @@ transformers==4.50.3 # via -r requirements/base.in triton==3.2.0 # via torch -typing-extensions==4.13.0 +typing-extensions==4.13.1 # via # huggingface-hub # torch diff --git a/requirements/dev.in b/requirements/dev.in index 4b81a167..48fb4396 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -2,6 +2,7 @@ -c base.txt -c test.txt jupyter -ipython==8.18.1 +ipython<=8.19; python_version <= '3.9' +ipython; python_version >= '3.10' pip-tools matplotlib \ No newline at end of file diff --git a/requirements/dev.txt b/requirements/dev.txt index 601fa00c..2d74eb74 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -55,7 +55,7 @@ comm==0.2.2 # via # ipykernel # ipywidgets -contourpy==1.3.0 +contourpy==1.3.1 ; python_version >= "3.10" # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib @@ -104,12 +104,14 @@ ipykernel==6.29.5 # jupyter # jupyter-console # jupyterlab -ipython==8.18.1 +ipython==9.1.0 ; python_version >= "3.10" # via # -r requirements/dev.in # ipykernel # ipywidgets # jupyter-console +ipython-pygments-lexers==1.1.1 + # via ipython ipywidgets==8.1.5 # via jupyter isoduration==20.11.0 @@ -123,7 +125,7 @@ jinja2==3.1.6 # jupyterlab # jupyterlab-server # nbconvert -json5==0.11.0 +json5==0.12.0 # via jupyterlab-server jsonpointer==3.0.0 # via jsonschema @@ -179,7 +181,7 @@ jupyterlab-server==2.27.3 # notebook jupyterlab-widgets==3.0.13 # via ipywidgets -kiwisolver==1.4.7 +kiwisolver==1.4.8 ; python_version >= "3.10" # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib @@ -188,7 +190,7 @@ markupsafe==3.0.2 # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # jinja2 # nbconvert -matplotlib==3.9.4 +matplotlib==3.10.1 ; python_version >= "3.10" # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -r requirements/dev.in @@ -217,7 +219,7 @@ notebook-shim==0.2.4 # via # jupyterlab # notebook -numpy==2.0.2 +numpy==2.2.4 ; python_version > "3.10" # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # contourpy @@ -274,6 +276,7 @@ pycparser==2.22 pygments==2.19.1 # via # ipython + # ipython-pygments-lexers # jupyter-console # nbconvert pyparsing==3.2.3 @@ -297,7 +300,7 @@ pyyaml==6.0.2 # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt # jupyter-events -pyzmq==26.3.0 +pyzmq==26.4.0 # via # ipykernel # jupyter-client @@ -372,7 +375,7 @@ traitlets==5.14.3 # nbformat types-python-dateutil==2.9.0.20241206 # via arrow -typing-extensions==4.13.0 +typing-extensions==4.13.1 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt diff --git a/requirements/test.txt b/requirements/test.txt index 725006ca..2c2c21fb 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -46,7 +46,7 @@ httpcore==1.0.7 # via httpx httpx==0.28.1 # via -r requirements/test.in -huggingface-hub==0.30.1 +huggingface-hub==0.30.2 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -r requirements/test.in @@ -94,7 +94,7 @@ pytest==8.3.5 # via # pytest-cov # pytest-mock -pytest-cov==6.1.0 +pytest-cov==6.1.1 # via -r requirements/test.in pytest-mock==3.14.0 # via -r requirements/test.in @@ -106,7 +106,7 @@ requests==2.32.3 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # huggingface-hub -ruff==0.11.2 +ruff==0.11.4 # via -r requirements/test.in sniffio==1.3.1 # via anyio @@ -118,7 +118,7 @@ tqdm==4.67.1 # huggingface-hub types-pyyaml==6.0.12.20250402 # via -r requirements/test.in -typing-extensions==4.13.0 +typing-extensions==4.13.1 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # anyio From f96790559b5d6c61856d853cc74b3dda38d235fb Mon Sep 17 00:00:00 2001 From: Philippe Prados Date: Fri, 11 Apr 2025 14:52:45 +0200 Subject: [PATCH 3/3] Fix 3.9 dependencies --- .github/workflows/ci.yml | 2 +- Makefile | 6 +++++- requirements/base-3.10.in | 17 +++++++++++++++++ requirements/base.in | 28 ++++++++++------------------ requirements/base.txt | 20 ++++++++++---------- requirements/dev.txt | 24 ++++++++++++------------ requirements/test.txt | 6 +++--- 7 files changed, 58 insertions(+), 45 deletions(-) create mode 100644 requirements/base-3.10.in diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 86198f32..c25520e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,7 +97,7 @@ jobs: with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ secrets.AWS_REGION }} + aws-region: ${{ secrets.AWS_DEFAULT_REGION }} - name: Test env: UNSTRUCTURED_HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/Makefile b/Makefile index 173b2ed4..0dbe373f 100644 --- a/Makefile +++ b/Makefile @@ -38,8 +38,12 @@ install-dev: install-test requirements/dev.txt ## pip-compile: compiles all base/dev/test requirements .PHONY: pip-compile -pip-compile: requirements/base.txt requirements/test.txt requirements/dev.txt +pip-compile \ +requirements/base.txt requirements/base-3.10.txt requirements/test.txt \ +requirements/dev.txt \ +: requirements/base.in requirements/base-3.10.in requirements/test.in requirements/dev.in pip-compile --upgrade requirements/base.in + pip-compile --upgrade requirements/base-3.10.in -o requirements/base-3.10.txt pip-compile --upgrade requirements/test.in pip-compile --upgrade requirements/dev.in diff --git a/requirements/base-3.10.in b/requirements/base-3.10.in new file mode 100644 index 00000000..695fd0cc --- /dev/null +++ b/requirements/base-3.10.in @@ -0,0 +1,17 @@ +-c constraints.in +python-multipart +huggingface-hub +numpy +opencv-python!=4.7.0.68 +onnx +onnxruntime ; python_version >= '3.10' +matplotlib ; python_version >= '3.10' +torch +timm +# NOTE(alan): Pinned because this is when the most recent module we import appeared +transformers +rapidfuzz +pandas +scipy ; python_version >= '3.10' +pypdfium2 +pdfminer-six>=20250327 diff --git a/requirements/base.in b/requirements/base.in index 82e2b05d..5e54bf4b 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,28 +1,20 @@ -c constraints.in python-multipart huggingface-hub -numpy<2.1 ; python_version <= '3.9' -numpy ; python_version > '3.10' +numpy==2.0.2 opencv-python!=4.7.0.68 onnx -onnxruntime<1.20 ; python_version <= '3.9' -onnxruntime ; python_version >= '3.10' -matplotlib<3.10 ; python_version <= '3.9' -matplotlib ; python_version >= '3.10' -torch +onnxruntime<1.20 +matplotlib==3.9.4 +contourpy==1.3.0 +kiwisolver==1.4.7 +torch==2.6.0 +networkx==3.2.1 timm -# NOTE(alan): Pinned because this is when the most recent module we import appeared -#transformers>=4.25.1 -transformers>=4.25.1,<4.51 +transformers rapidfuzz pandas -scipy<1.14 ; python_version <= '3.9' -scipy ; python_version >= '3.10' +scipy==1.13.1 pypdfium2 pdfminer-six>=20250327 -contourpy<1.4.0 ; python_version <= '3.9' -contourpy ; python_version >= '3.10' -kiwisolver<1.5 ; python_version <= '3.9' -kiwisolver ; python_version >= '3.10' -networkx==3.2.1 ; python_version <= '3.9' -networkx ; python_version >= '3.10' + diff --git a/requirements/base.txt b/requirements/base.txt index b4d2c15b..84de48b0 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -14,7 +14,7 @@ charset-normalizer==3.4.1 # requests coloredlogs==15.0.1 # via onnxruntime -contourpy==1.3.1 ; python_version >= "3.10" +contourpy==1.3.0 # via # -r requirements/base.in # matplotlib @@ -47,21 +47,21 @@ idna==3.10 # via requests jinja2==3.1.6 # via torch -kiwisolver==1.4.8 ; python_version >= "3.10" +kiwisolver==1.4.7 # via # -r requirements/base.in # matplotlib markupsafe==3.0.2 # via jinja2 -matplotlib==3.10.1 ; python_version >= "3.10" +matplotlib==3.9.4 # via -r requirements/base.in mpmath==1.3.0 # via sympy -networkx==3.4.2 ; python_version >= "3.10" +networkx==3.2.1 # via # -r requirements/base.in # torch -numpy==2.2.4 ; python_version > "3.10" +numpy==2.0.2 # via # -r requirements/base.in # contourpy @@ -109,7 +109,7 @@ nvidia-nvtx-cu12==12.4.127 # via torch onnx==1.17.0 # via -r requirements/base.in -onnxruntime==1.21.0 ; python_version >= "3.10" +onnxruntime==1.19.2 # via -r requirements/base.in opencv-python==4.11.0.86 # via -r requirements/base.in @@ -162,7 +162,7 @@ safetensors==0.5.3 # via # timm # transformers -scipy==1.15.2 ; python_version >= "3.10" +scipy==1.13.1 # via -r requirements/base.in six==1.17.0 # via python-dateutil @@ -185,17 +185,17 @@ tqdm==4.67.1 # via # huggingface-hub # transformers -transformers==4.50.3 +transformers==4.51.2 # via -r requirements/base.in triton==3.2.0 # via torch -typing-extensions==4.13.1 +typing-extensions==4.13.2 # via # huggingface-hub # torch tzdata==2025.2 # via pandas -urllib3==2.3.0 +urllib3==2.4.0 # via requests # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/dev.txt b/requirements/dev.txt index 2d74eb74..7c86c4af 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -55,7 +55,7 @@ comm==0.2.2 # via # ipykernel # ipywidgets -contourpy==1.3.1 ; python_version >= "3.10" +contourpy==1.3.0 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib @@ -63,7 +63,7 @@ cycler==0.12.1 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib -debugpy==1.8.13 +debugpy==1.8.14 # via ipykernel decorator==5.2.1 # via ipython @@ -112,7 +112,7 @@ ipython==9.1.0 ; python_version >= "3.10" # jupyter-console ipython-pygments-lexers==1.1.1 # via ipython -ipywidgets==8.1.5 +ipywidgets==8.1.6 # via jupyter isoduration==20.11.0 # via jsonschema @@ -169,7 +169,7 @@ jupyter-server==2.15.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.3.6 +jupyterlab==4.4.0 # via # jupyter # notebook @@ -179,9 +179,9 @@ jupyterlab-server==2.27.3 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.13 +jupyterlab-widgets==3.0.14 # via ipywidgets -kiwisolver==1.4.8 ; python_version >= "3.10" +kiwisolver==1.4.7 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # matplotlib @@ -190,7 +190,7 @@ markupsafe==3.0.2 # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # jinja2 # nbconvert -matplotlib==3.10.1 ; python_version >= "3.10" +matplotlib==3.9.4 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -r requirements/dev.in @@ -213,13 +213,13 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -notebook==7.3.3 +notebook==7.4.0 # via jupyter notebook-shim==0.2.4 # via # jupyterlab # notebook -numpy==2.2.4 ; python_version > "3.10" +numpy==2.0.2 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # contourpy @@ -375,7 +375,7 @@ traitlets==5.14.3 # nbformat types-python-dateutil==2.9.0.20241206 # via arrow -typing-extensions==4.13.1 +typing-extensions==4.13.2 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt @@ -384,7 +384,7 @@ typing-extensions==4.13.1 # referencing uri-template==1.3.0 # via jsonschema -urllib3==2.3.0 +urllib3==2.4.0 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # -c /home/pprados/workspace.bda/unstructured-inference/requirements/test.txt @@ -401,7 +401,7 @@ websocket-client==1.8.0 # via jupyter-server wheel==0.45.1 # via pip-tools -widgetsnbextension==4.0.13 +widgetsnbextension==4.0.14 # via ipywidgets # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/test.txt b/requirements/test.txt index 2c2c21fb..3362df7f 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -106,7 +106,7 @@ requests==2.32.3 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # huggingface-hub -ruff==0.11.4 +ruff==0.11.5 # via -r requirements/test.in sniffio==1.3.1 # via anyio @@ -118,13 +118,13 @@ tqdm==4.67.1 # huggingface-hub types-pyyaml==6.0.12.20250402 # via -r requirements/test.in -typing-extensions==4.13.1 +typing-extensions==4.13.2 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # anyio # huggingface-hub # mypy -urllib3==2.3.0 +urllib3==2.4.0 # via # -c /home/pprados/workspace.bda/unstructured-inference/requirements/base.txt # requests