Skip to content

Commit 3a5d824

Browse files
authored
feat/support python 3.13 (#331)
* add 3.13 to supported python versions * fix tidy * isolate unstructured unit tests * fix file path for unit tests * explicitly download nltk data
1 parent 9436f0f commit 3a5d824

File tree

17 files changed

+46
-176
lines changed

17 files changed

+46
-176
lines changed

.github/workflows/unit_tests.yml

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
lint:
1818
strategy:
1919
matrix:
20-
python-version: [ "3.9","3.10","3.11" ]
20+
python-version: [ "3.9","3.10","3.11", "3.12", "3.13"]
2121
runs-on: ubuntu-latest
2222
steps:
2323
- uses: actions/checkout@v4
@@ -67,7 +67,7 @@ jobs:
6767
environment: ci
6868
strategy:
6969
matrix:
70-
python-version: [ "3.9","3.10","3.11" ]
70+
python-version: [ "3.9","3.10","3.11", "3.12", "3.13" ]
7171
runs-on: ubuntu-latest
7272
needs: [ lint ]
7373
steps:
@@ -86,7 +86,7 @@ jobs:
8686
environment: ci
8787
strategy:
8888
matrix:
89-
python-version: [ "3.9","3.10","3.11" ]
89+
python-version: [ "3.9","3.10","3.11", "3.12", "3.13" ]
9090
runs-on: ubuntu-latest
9191
needs: [ lint ]
9292
steps:
@@ -105,7 +105,27 @@ jobs:
105105
test_ingest_unit:
106106
strategy:
107107
matrix:
108-
python-version: [ "3.9","3.10" ]
108+
python-version: [ "3.9","3.10","3.11", "3.12", "3.13" ]
109+
runs-on: ubuntu-latest
110+
needs: [ lint ]
111+
steps:
112+
# actions/checkout MUST come before auth
113+
- uses: 'actions/checkout@v4'
114+
- name: Set up Python ${{ matrix.python-version }}
115+
uses: actions/setup-python@v5
116+
with:
117+
python-version: ${{ matrix.python-version }}
118+
- name: Test Ingest (unit)
119+
run: |
120+
make install-base
121+
make install-test
122+
make unit-test
123+
124+
test_ingest_unit_unstructured:
125+
strategy:
126+
matrix:
127+
# TODO include 3.13 when unstructured supports it
128+
python-version: [ "3.9","3.10","3.11", "3.12"]
109129
runs-on: ubuntu-latest
110130
env:
111131
NLTK_DATA: ${{ github.workspace }}/nltk_data
@@ -122,4 +142,5 @@ jobs:
122142
make install-base
123143
make install-test
124144
pip install unstructured
125-
make unit-test
145+
python -m nltk.downloader -d $NLTK_DATA punkt_tab averaged_perceptron_tagger_eng
146+
make unit-test-unstructured

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.3.13-dev3
1+
## 0.3.13-dev4
22

33
### Fixes
44

@@ -9,6 +9,7 @@
99
### Enhancements
1010

1111
* **Async support for all IO-bounded embedders**
12+
* **Expand support to Python 3.13**
1213

1314
## 0.3.12
1415

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,11 @@ check-version:
119119
###########
120120
.PHONY: unit-test
121121
unit-test:
122-
PYTHONPATH=. pytest -sv --cov unstructured_ingest/ test/unit
122+
PYTHONPATH=. pytest -sv --cov unstructured_ingest/ test/unit --ignore test/unit/unstructured
123+
124+
.PHONY: unit-test-unstructured
125+
unit-test-unstructured:
126+
PYTHONPATH=. pytest -sv --cov unstructured_ingest/ test/unit/unstructured
123127

124128
.PHONY: integration-test
125129
integration-test:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def load_requirements(file: Union[str, Path]) -> List[str]:
173173
long_description_content_type="text/markdown",
174174
keywords="NLP PDF HTML CV XML parsing preprocessing",
175175
url="https://github.com/Unstructured-IO/unstructured-ingest",
176-
python_requires=">=3.9.0,<3.13",
176+
python_requires=">=3.9.0,<3.14",
177177
classifiers=[
178178
"Development Status :: 4 - Beta",
179179
"Intended Audience :: Developers",

test/unit/embed/test_mixedbreadai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def mock_embeddings(
3131
)
3232
)
3333

34-
raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
34+
raw_elements = [{"text": f"This is sentence {i + 1}"} for i in range(2)]
3535
elements = encoder.embed_documents(
3636
elements=raw_elements,
3737
)

test/unit/embed/test_octoai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
1717
mocker.patch.object(OctoAiEmbeddingConfig, "get_client", return_value=mock_client)
1818

1919
encoder = OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(api_key="api_key"))
20-
raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
20+
raw_elements = [{"text": f"This is sentence {i + 1}"} for i in range(2)]
2121

2222
elements = encoder.embed_documents(
2323
elements=raw_elements,

test/unit/embed/test_openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
1010
mocker.patch.object(OpenAIEmbeddingConfig, "get_client", return_value=mock_client)
1111

1212
encoder = OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(api_key="api_key"))
13-
raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
13+
raw_elements = [{"text": f"This is sentence {i + 1}"} for i in range(2)]
1414

1515
elements = encoder.embed_documents(
1616
elements=raw_elements,

test/unit/embed/test_vertexai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
1515
# Mock create_client to return our mock_client
1616
mocker.patch.object(VertexAIEmbeddingConfig, "get_client", return_value=mock_client)
1717
encoder = VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(api_key={"api_key": "value"}))
18-
raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
18+
raw_elements = [{"text": f"This is sentence {i + 1}"} for i in range(2)]
1919

2020
elements = encoder.embed_documents(
2121
elements=raw_elements,

test/unit/embed/test_voyageai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
1414
encoder = VoyageAIEmbeddingEncoder(
1515
config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-law-2")
1616
)
17-
raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
17+
raw_elements = [{"text": f"This is sentence {i + 1}"} for i in range(2)]
1818

1919
elements = encoder.embed_documents(
2020
elements=raw_elements,

test/unit/pipeline/reformat/test_chunking.py

Lines changed: 0 additions & 156 deletions
This file was deleted.

0 commit comments

Comments
 (0)