Skip to content

Commit e510f26

Browse files
authored
build: 0.0.78 release; bump to unstructured==0.15.6 (#453)
### Summary Bumps to `unstructured==0.15.6`. Resolve CVE related to the `nltk` library.
1 parent f91ce3a commit e510f26

File tree

9 files changed

+32
-30
lines changed

9 files changed

+32
-30
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
## 0.0.78
2+
3+
* Resolve NLTK CVE.
4+
* Bump to `unstructured` 0.15.6
5+
16
## 0.0.77
27

38
* Bump to `unstructured` 0.15.5

Dockerfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ RUN ${PIP} install pip==${PIP_VERSION}
2525
RUN ${PIP} install --no-cache -r requirements-base.txt
2626

2727
FROM python-deps as model-deps
28-
RUN ${PYTHON} -c "import nltk; nltk.download('punkt')" && \
29-
${PYTHON} -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
28+
RUN ${PYTHON} -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()" && \
3029
${PYTHON} -c "from unstructured.partition.model_init import initialize; initialize()"
3130

3231
FROM model-deps as code

Makefile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,9 @@ install-test: install-base
3333
.PHONY: install-ci
3434
install-ci: install-test
3535

36-
.PHONE: install-nltk-models
36+
.PHONY: install-nltk-models
3737
install-nltk-models:
38-
python3 -c "import nltk; nltk.download('punkt')"
39-
python3 -c "import nltk; nltk.download('averaged_perceptron_tagger')"
38+
python3 -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()"
4039

4140
## pip-compile: compiles all base/dev/test requirements
4241
.PHONY: pip-compile

docker/rockylinux-9.4/Dockerfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ RUN python3.10 -m pip install pip==${PIP_VERSION} \
3232
USER ${NB_USER}
3333

3434
FROM python-deps as model-deps
35-
RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
36-
python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
35+
RUN python3.10 -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()" && \
3736
python3.10 -c "from unstructured.partition.model_init import initialize; initialize()"
3837

3938
FROM model-deps as code

prepline_general/api/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
app = FastAPI(
1414
title="Unstructured Pipeline API",
1515
summary="Partition documents with the Unstructured library",
16-
version="0.0.77",
16+
version="0.0.78",
1717
docs_url="/general/docs",
1818
openapi_url="/general/openapi.json",
1919
servers=[

prepline_general/api/general.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ def return_content_type(filename: str):
649649

650650

651651
@router.get("/general/v0/general", include_in_schema=False)
652-
@router.get("/general/v0.0.77/general", include_in_schema=False)
652+
@router.get("/general/v0.0.78/general", include_in_schema=False)
653653
async def handle_invalid_get_request():
654654
raise HTTPException(
655655
status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Only POST requests are supported."
@@ -664,7 +664,7 @@ async def handle_invalid_get_request():
664664
description="Description",
665665
operation_id="partition_parameters",
666666
)
667-
@router.post("/general/v0.0.77/general", include_in_schema=False)
667+
@router.post("/general/v0.0.78/general", include_in_schema=False)
668668
def general_partition(
669669
request: Request,
670670
# cannot use annotated type here because of a bug described here:

preprocessing-pipeline-family.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
name: general
2-
version: 0.0.77
2+
version: 0.0.78

requirements/base.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ backoff==2.2.1
1818
# unstructured
1919
beautifulsoup4==4.12.3
2020
# via unstructured
21-
cachetools==5.4.0
21+
cachetools==5.5.0
2222
# via google-auth
2323
certifi==2024.7.4
2424
# via
@@ -84,7 +84,7 @@ fsspec==2024.6.1
8484
# torch
8585
google-api-core[grpc]==2.19.1
8686
# via google-cloud-vision
87-
google-auth==2.33.0
87+
google-auth==2.34.0
8888
# via
8989
# google-api-core
9090
# google-cloud-vision
@@ -94,11 +94,11 @@ googleapis-common-protos==1.63.2
9494
# via
9595
# google-api-core
9696
# grpcio-status
97-
grpcio==1.65.4
97+
grpcio==1.65.5
9898
# via
9999
# google-api-core
100100
# grpcio-status
101-
grpcio-status==1.65.4
101+
grpcio-status==1.65.5
102102
# via google-api-core
103103
h11==0.14.0
104104
# via
@@ -108,7 +108,7 @@ httpcore==1.0.5
108108
# via httpx
109109
httpx==0.27.0
110110
# via unstructured-client
111-
huggingface-hub==0.24.5
111+
huggingface-hub==0.24.6
112112
# via
113113
# timm
114114
# tokenizers
@@ -142,7 +142,7 @@ lxml==5.3.0
142142
# python-docx
143143
# python-pptx
144144
# unstructured
145-
markdown==3.6
145+
markdown==3.7
146146
# via unstructured
147147
markupsafe==2.1.5
148148
# via jinja2
@@ -166,7 +166,7 @@ networkx==3.3
166166
# via
167167
# torch
168168
# unstructured
169-
nltk==3.8.1
169+
nltk==3.9.1
170170
# via unstructured
171171
numpy==1.26.4
172172
# via
@@ -223,7 +223,7 @@ pdfminer-six==20231228
223223
# via
224224
# pdfplumber
225225
# unstructured
226-
pdfplumber==0.11.3
226+
pdfplumber==0.11.4
227227
# via layoutparser
228228
pikepdf==9.1.1
229229
# via unstructured
@@ -409,7 +409,7 @@ typing-inspect==0.9.0
409409
# unstructured-client
410410
tzdata==2024.1
411411
# via pandas
412-
unstructured[all-docs]==0.15.5
412+
unstructured[all-docs]==0.15.6
413413
# via -r requirements/base.in
414414
unstructured-client==0.25.5
415415
# via unstructured

requirements/test.txt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ black==24.8.0
5353
# via -r requirements/test.in
5454
bleach==6.1.0
5555
# via nbconvert
56-
cachetools==5.4.0
56+
cachetools==5.5.0
5757
# via
5858
# -r requirements/base.txt
5959
# google-auth
@@ -193,7 +193,7 @@ google-api-core[grpc]==2.19.1
193193
# via
194194
# -r requirements/base.txt
195195
# google-cloud-vision
196-
google-auth==2.33.0
196+
google-auth==2.34.0
197197
# via
198198
# -r requirements/base.txt
199199
# google-api-core
@@ -207,12 +207,12 @@ googleapis-common-protos==1.63.2
207207
# -r requirements/base.txt
208208
# google-api-core
209209
# grpcio-status
210-
grpcio==1.65.4
210+
grpcio==1.65.5
211211
# via
212212
# -r requirements/base.txt
213213
# google-api-core
214214
# grpcio-status
215-
grpcio-status==1.65.4
215+
grpcio-status==1.65.5
216216
# via
217217
# -r requirements/base.txt
218218
# google-api-core
@@ -231,7 +231,7 @@ httpx==0.27.0
231231
# -r requirements/test.in
232232
# jupyterlab
233233
# unstructured-client
234-
huggingface-hub==0.24.5
234+
huggingface-hub==0.24.6
235235
# via
236236
# -r requirements/base.txt
237237
# timm
@@ -365,7 +365,7 @@ lxml==5.3.0
365365
# python-docx
366366
# python-pptx
367367
# unstructured
368-
markdown==3.6
368+
markdown==3.7
369369
# via
370370
# -r requirements/base.txt
371371
# unstructured
@@ -428,7 +428,7 @@ networkx==3.3
428428
# -r requirements/base.txt
429429
# torch
430430
# unstructured
431-
nltk==3.8.1
431+
nltk==3.9.1
432432
# via
433433
# -r requirements/base.txt
434434
# unstructured
@@ -530,7 +530,7 @@ pdfminer-six==20231228
530530
# -r requirements/base.txt
531531
# pdfplumber
532532
# unstructured
533-
pdfplumber==0.11.3
533+
pdfplumber==0.11.4
534534
# via
535535
# -r requirements/base.txt
536536
# layoutparser
@@ -708,7 +708,7 @@ pyyaml==6.0.2
708708
# omegaconf
709709
# timm
710710
# transformers
711-
pyzmq==26.1.0
711+
pyzmq==26.1.1
712712
# via
713713
# ipykernel
714714
# jupyter-client
@@ -916,7 +916,7 @@ tzdata==2024.1
916916
# via
917917
# -r requirements/base.txt
918918
# pandas
919-
unstructured[all-docs]==0.15.5
919+
unstructured[all-docs]==0.15.6
920920
# via -r requirements/base.txt
921921
unstructured-client==0.25.5
922922
# via

0 commit comments

Comments
 (0)