Skip to content

Commit c91d1b9

Browse files
authored
feat: Only allow one Chipper call at a time (#296)
Chipper V2 is very memory hungry. While we work to optimize this, we need to restrict the server to one call at a time. While the model is in use, we'll return a 503 "Please try again". Our hosted API should scale up to meet demand, so the next call should route to an available server. This includes a refactor to how partition_kwargs are passed to either parallel mode, local partition, or local partition with the new Chipper protection. To verify, try calling Chipper twice: ``` curl -X POST 'http://localhost:8000/general/v0/general' --form files="@$file" --form strategy=hi_res --form hi_res_model_name=chipper & curl -X POST 'http://localhost:8000/general/v0/general' --form files="@$file" --form strategy=hi_res --form hi_res_model_name=chipper ``` The second call will get a 503 response. Other changes: * Return a 400 error if Chipper isn't loaded. The model is private, make sure we explain this for users who self host * Pass the huggingface token to `make docker-start-api` for better dev experience * Add a `make docker-start-bash` while we're in here
1 parent 51b9a8c commit c91d1b9

File tree

5 files changed

+116
-51
lines changed

5 files changed

+116
-51
lines changed

CHANGELOG.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
## 0.0.55-dev1
1+
## 0.0.55
22

3-
* Bump unstructured to 0.0.26
3+
* Bump unstructured to 0.10.26
44
* Bring parent_id metadata field back after fixing a backwards compatibility bug
5+
* Restrict Chipper usage to one at a time. The model is very resource intense, and this will prevent issues while we improve it.
56

67
## 0.0.54
78

Makefile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,15 @@ docker-build:
6666

6767
.PHONY: docker-start-api
6868
docker-start-api:
69-
docker run -p 8000:8000 --mount type=bind,source=$(realpath .),target=/home/notebook-user/local -it --rm pipeline-family-${PIPELINE_FAMILY}-dev:latest scripts/app-start.sh
69+
docker run -p 8000:8000 \
70+
-it --rm \
71+
--mount type=bind,source=$(realpath .),target=/home/notebook-user/local \
72+
-e UNSTRUCTURED_HF_TOKEN=${UNSTRUCTURED_HF_TOKEN} \
73+
pipeline-family-${PIPELINE_FAMILY}-dev:latest scripts/app-start.sh
74+
75+
.PHONY: docker-start-bash
76+
docker-start-bash:
77+
docker run -p 8000:8000 -it --rm --mount type=bind,source=$(realpath .),target=/home/notebook-user/local --entrypoint /bin/bash pipeline-family-${PIPELINE_FAMILY}-dev:latest
7078

7179
.PHONY: docker-test
7280
docker-test:

prepline_general/api/general.py

Lines changed: 66 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,32 @@ def partition_pdf_splits(
223223
return results
224224

225225

226+
IS_CHIPPER_PROCESSING = False
227+
228+
229+
class ChipperMemoryProtection:
230+
"""
231+
Chipper calls are expensive, and right now we can only do one call at a time.
232+
If the model is in use, return a 503 error. The API should scale up and the user can try again
233+
on a different server.
234+
"""
235+
236+
def __enter__(self):
237+
global IS_CHIPPER_PROCESSING
238+
if IS_CHIPPER_PROCESSING:
239+
# Log here so we can track how often it happens
240+
logger.error("Chipper is already is use")
241+
raise HTTPException(
242+
status_code=503, detail="Server is under heavy load. Please try again later."
243+
)
244+
245+
IS_CHIPPER_PROCESSING = True
246+
247+
def __exit__(self, exc_type, exc_value, exc_tb):
248+
global IS_CHIPPER_PROCESSING
249+
IS_CHIPPER_PROCESSING = False
250+
251+
226252
def pipeline_api(
227253
file,
228254
request=None,
@@ -403,52 +429,52 @@ def pipeline_api(
403429
)
404430
)
405431

406-
# Be careful of naming differences in api params vs partition params!
407-
# These kwargs are going back into the api, not into partition
408-
# If there's a difference, remap the param in partition_pdf_splits
432+
partition_kwargs = {
433+
"file": file,
434+
"metadata_filename": filename,
435+
"content_type": file_content_type,
436+
"encoding": encoding,
437+
"include_page_breaks": include_page_breaks,
438+
"model_name": hi_res_model_name,
439+
"ocr_languages": ocr_languages,
440+
"pdf_infer_table_structure": pdf_infer_table_structure,
441+
"skip_infer_table_types": skip_infer_table_types,
442+
"strategy": strategy,
443+
"xml_keep_tags": xml_keep_tags,
444+
"languages": languages,
445+
"chunking_strategy": chunking_strategy,
446+
"multipage_sections": multipage_sections,
447+
"combine_under_n_chars": combine_under_n_chars,
448+
"new_after_n_chars": new_after_n_chars,
449+
}
450+
409451
if file_content_type == "application/pdf" and pdf_parallel_mode_enabled:
452+
# Be careful of naming differences in api params vs partition params!
453+
# These kwargs are going back into the api, not into partition
454+
# They need to be switched back in partition_pdf_splits
455+
if partition_kwargs.get("model_name"):
456+
partition_kwargs["hi_res_model_name"] = partition_kwargs.pop("model_name")
457+
410458
elements = partition_pdf_splits(
411-
request,
459+
request=request,
412460
pdf_pages=pdf.pages,
413-
file=file,
414-
metadata_filename=filename,
415-
content_type=file_content_type,
416461
coordinates=show_coordinates,
417-
# partition_kwargs
418-
encoding=encoding,
419-
include_page_breaks=include_page_breaks,
420-
hi_res_model_name=hi_res_model_name,
421-
ocr_languages=ocr_languages,
422-
pdf_infer_table_structure=pdf_infer_table_structure,
423-
skip_infer_table_types=skip_infer_table_types,
424-
strategy=strategy,
425-
xml_keep_tags=xml_keep_tags,
426-
languages=languages,
427-
chunking_strategy=chunking_strategy,
428-
multipage_sections=multipage_sections,
429-
combine_under_n_chars=combine_under_n_chars,
430-
new_after_n_chars=new_after_n_chars,
462+
**partition_kwargs,
431463
)
464+
elif hi_res_model_name and hi_res_model_name in CHIPPER_MODEL_TYPES:
465+
with ChipperMemoryProtection():
466+
elements = partition(**partition_kwargs)
432467
else:
433-
elements = partition(
434-
file=file,
435-
metadata_filename=filename,
436-
content_type=file_content_type,
437-
# partition_kwargs
438-
encoding=encoding,
439-
include_page_breaks=include_page_breaks,
440-
model_name=hi_res_model_name,
441-
ocr_languages=ocr_languages,
442-
pdf_infer_table_structure=pdf_infer_table_structure,
443-
skip_infer_table_types=skip_infer_table_types,
444-
strategy=strategy,
445-
xml_keep_tags=xml_keep_tags,
446-
languages=languages,
447-
chunking_strategy=chunking_strategy,
448-
multipage_sections=multipage_sections,
449-
combine_under_n_chars=combine_under_n_chars,
450-
new_after_n_chars=new_after_n_chars,
468+
elements = partition(**partition_kwargs)
469+
470+
except OSError as e:
471+
if "chipper-fast-fine-tuning is not a local folder" in e.args[0]:
472+
raise HTTPException(
473+
status_code=400,
474+
detail="The Chipper model is not available for download. It can be accessed via the official hosted API.",
451475
)
476+
477+
raise e
452478
except ValueError as e:
453479
if "Invalid file" in e.args[0]:
454480
raise HTTPException(
@@ -459,6 +485,7 @@ def pipeline_api(
459485
status_code=400,
460486
detail="Json schema does not match the Unstructured schema",
461487
)
488+
462489
raise e
463490
except zipfile.BadZipFile:
464491
raise HTTPException(

scripts/app-start.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
#!/usr/bin/env bash
22

3-
UNSTRUCTURED_DOWNLOAD_CHIPPER=${UNSTRUCTURED_DOWNLOAD_CHIPPER:-"false"}
4-
5-
if [[ "$(echo "${UNSTRUCTURED_DOWNLOAD_CHIPPER}" | tr '[:upper:]' '[:lower:]')" == "true" ]]; then
6-
echo "warming chipper model"
7-
# NOTE(crag): in the cloud, this could add a minute to startup time
8-
UNSTRUCTURED_HI_RES_SUPPORTED_MODEL=chipper python3.8 -c \
9-
"from unstructured.ingest.doc_processor.generalized import initialize; initialize()"
10-
fi
11-
123
uvicorn prepline_general.api.app:app \
134
--log-config logger_config.yaml \
145
--host 0.0.0.0

test_general/api/test_app.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
import io
55
import pytest
66
import requests
7+
import time
78
import pandas as pd
9+
from concurrent.futures import ThreadPoolExecutor
810
from fastapi.testclient import TestClient
911
from fastapi import HTTPException
1012
from pypdf import PdfWriter, PdfReader
@@ -784,3 +786,39 @@ def test_general_api_returns_400_bad_json(tmpdir):
784786
)
785787
assert "Unstructured schema" in response.json().get("detail")
786788
assert response.status_code == 400
789+
790+
791+
def test_chipper_memory_protection(monkeypatch, mocker):
792+
"""
793+
For now, only 1 Chipper call is allowed at a time.
794+
Assert that we return a 503 while it's in use.
795+
"""
796+
797+
def mock_partition(*args, **kwargs):
798+
time.sleep(2)
799+
return {}
800+
801+
monkeypatch.setattr(
802+
general,
803+
"partition",
804+
mock_partition,
805+
)
806+
807+
client = TestClient(app)
808+
test_file = Path("sample-docs") / "layout-parser-paper-fast.pdf"
809+
810+
def make_request(*args):
811+
return client.post(
812+
MAIN_API_ROUTE,
813+
files=[("files", (str(test_file), open(test_file, "rb"), "application/pdf"))],
814+
data={"strategy": "hi_res", "hi_res_model_name": "chipper"},
815+
)
816+
817+
with ThreadPoolExecutor() as executor:
818+
responses = list(executor.map(make_request, range(3)))
819+
820+
status_codes = [response.status_code for response in responses]
821+
822+
# Assert only one call got through
823+
assert status_codes.count(200) == 1
824+
assert status_codes.count(503) == 2

0 commit comments

Comments
 (0)