Skip to content

Commit c4e21cc

Browse files
authored
chore: throw UnstructuredIngestError correctly in plugins (#62)
### Summary saw error for pinecone ``` 500: Error in uploader - [UnstructuredIngestError] http error: (400) .... Vector dimension 1536 does not match the dimension of the index 1024 ``` I believe this should be a 400 so I need to raise `UnstructuredIngestError` here unfortunately its defined twice: * https://github.com/Unstructured-IO/unstructured-ingest/blob/main/unstructured_ingest/errors_v2.py#L6 * https://github.com/Unstructured-IO/unstructured-ingest/blob/main/unstructured_ingest/error.py#L6
1 parent ac5c57a commit c4e21cc

File tree

4 files changed

+210
-1
lines changed

4 files changed

+210
-1
lines changed

test/api/test_api.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,3 +322,151 @@ def test_streaming_exception_with_valid_status_code():
322322
# Should use the exception's status_code
323323
assert invoke_response.status_code == 422
324324
assert "ExceptionWithValidStatusCode" in invoke_response.status_code_text
325+
326+
327+
@pytest.mark.parametrize(
328+
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
329+
)
330+
def test_unstructured_ingest_error_with_status_code(file_data):
331+
"""Test that UnstructuredIngestError with status_code is handled correctly."""
332+
from test.assets.exception_status_code import (
333+
function_raises_unstructured_ingest_error_with_status_code as test_fn,
334+
)
335+
336+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
337+
338+
post_body = {"file_data": file_data.model_dump()}
339+
resp = client.post("/invoke", json=post_body)
340+
resp_content = resp.json()
341+
invoke_response = InvokeResponse.model_validate(resp_content)
342+
343+
# Should use the UnstructuredIngestError's status_code
344+
assert invoke_response.status_code == 400
345+
assert invoke_response.status_code_text == "Test UnstructuredIngestError with status_code"
346+
347+
348+
@pytest.mark.parametrize(
349+
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
350+
)
351+
def test_unstructured_ingest_error_without_status_code(file_data):
352+
"""Test that UnstructuredIngestError without status_code defaults to 500."""
353+
from test.assets.exception_status_code import (
354+
function_raises_unstructured_ingest_error_without_status_code as test_fn,
355+
)
356+
357+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
358+
359+
post_body = {"file_data": file_data.model_dump()}
360+
resp = client.post("/invoke", json=post_body)
361+
resp_content = resp.json()
362+
invoke_response = InvokeResponse.model_validate(resp_content)
363+
364+
# Should default to 500 when UnstructuredIngestError has no status_code
365+
assert invoke_response.status_code == 500
366+
assert invoke_response.status_code_text == "Test UnstructuredIngestError without status_code"
367+
368+
369+
@pytest.mark.parametrize(
370+
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
371+
)
372+
def test_unstructured_ingest_error_with_none_status_code(file_data):
373+
"""Test that UnstructuredIngestError with None status_code defaults to 500."""
374+
from test.assets.exception_status_code import (
375+
function_raises_unstructured_ingest_error_with_none_status_code as test_fn,
376+
)
377+
378+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
379+
380+
post_body = {"file_data": file_data.model_dump()}
381+
resp = client.post("/invoke", json=post_body)
382+
resp_content = resp.json()
383+
invoke_response = InvokeResponse.model_validate(resp_content)
384+
385+
# Should default to 500 when UnstructuredIngestError status_code is None
386+
assert invoke_response.status_code == 500
387+
assert invoke_response.status_code_text == "Test UnstructuredIngestError with None status_code"
388+
389+
390+
@pytest.mark.parametrize(
391+
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
392+
)
393+
def test_async_unstructured_ingest_error(file_data):
394+
"""Test that async functions with UnstructuredIngestError are handled correctly."""
395+
from test.assets.exception_status_code import (
396+
async_function_raises_unstructured_ingest_error as test_fn,
397+
)
398+
399+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
400+
401+
post_body = {"file_data": file_data.model_dump()}
402+
resp = client.post("/invoke", json=post_body)
403+
resp_content = resp.json()
404+
invoke_response = InvokeResponse.model_validate(resp_content)
405+
406+
# Should use the UnstructuredIngestError's status_code
407+
assert invoke_response.status_code == 503
408+
assert invoke_response.status_code_text == "Async test UnstructuredIngestError"
409+
410+
411+
def test_streaming_unstructured_ingest_error():
412+
"""Test that async generator functions with UnstructuredIngestError are handled correctly."""
413+
from test.assets.exception_status_code import (
414+
async_gen_function_raises_unstructured_ingest_error as test_fn,
415+
)
416+
417+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
418+
419+
post_body = {"file_data": mock_file_data[0].model_dump()}
420+
resp = client.post("/invoke", json=post_body)
421+
422+
# For streaming responses, we get NDJSON
423+
assert resp.status_code == 200
424+
assert resp.headers["content-type"] == "application/x-ndjson"
425+
426+
# Parse the streaming response - should be a single error response
427+
lines = resp.content.decode().strip().split("\n")
428+
assert len(lines) == 1 # Only error response since no items were yielded
429+
430+
# Parse the error response
431+
import json
432+
433+
error_response = json.loads(lines[0])
434+
invoke_response = InvokeResponse.model_validate(error_response)
435+
436+
# Should use the UnstructuredIngestError's status_code
437+
assert invoke_response.status_code == 502
438+
assert "Async gen test UnstructuredIngestError" in invoke_response.status_code_text
439+
440+
441+
def test_streaming_unstructured_ingest_error_with_none_status_code():
442+
"""Test that async generator functions with UnstructuredIngestError
443+
with None status_code are handled correctly."""
444+
from test.assets.exception_status_code import (
445+
async_gen_function_raises_unstructured_ingest_error_with_none_status_code as test_fn,
446+
)
447+
448+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
449+
450+
post_body = {"file_data": mock_file_data[0].model_dump()}
451+
resp = client.post("/invoke", json=post_body)
452+
453+
# For streaming responses, we get NDJSON
454+
assert resp.status_code == 200
455+
assert resp.headers["content-type"] == "application/x-ndjson"
456+
457+
# Parse the streaming response - should be a single error response
458+
lines = resp.content.decode().strip().split("\n")
459+
assert len(lines) == 1 # Only error response since no items were yielded
460+
461+
# Parse the error response
462+
import json
463+
464+
error_response = json.loads(lines[0])
465+
invoke_response = InvokeResponse.model_validate(error_response)
466+
467+
# Should default to 500 when UnstructuredIngestError status_code is None
468+
assert invoke_response.status_code == 500
469+
assert (
470+
"Async gen test UnstructuredIngestError with None status_code"
471+
in invoke_response.status_code_text
472+
)

test/assets/exception_status_code.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Test assets for testing exception handling with various status_code scenarios."""
22

33
from fastapi import HTTPException
4+
from unstructured_ingest.error import UnstructuredIngestError
45

56

67
class ExceptionWithNoneStatusCode(Exception):
@@ -51,6 +52,25 @@ def function_raises_generic_exception():
5152
raise ValueError("Generic error")
5253

5354

55+
def function_raises_unstructured_ingest_error_with_status_code():
56+
"""Function that raises UnstructuredIngestError with status_code."""
57+
error = UnstructuredIngestError("Test UnstructuredIngestError with status_code")
58+
error.status_code = 400
59+
raise error
60+
61+
62+
def function_raises_unstructured_ingest_error_without_status_code():
63+
"""Function that raises UnstructuredIngestError without status_code."""
64+
raise UnstructuredIngestError("Test UnstructuredIngestError without status_code")
65+
66+
67+
def function_raises_unstructured_ingest_error_with_none_status_code():
68+
"""Function that raises UnstructuredIngestError with None status_code."""
69+
error = UnstructuredIngestError("Test UnstructuredIngestError with None status_code")
70+
error.status_code = None
71+
raise error
72+
73+
5474
# Async versions for streaming response tests
5575
async def async_function_raises_exception_with_none_status_code():
5676
"""Async function that raises an exception with status_code=None."""
@@ -67,6 +87,13 @@ async def async_function_raises_exception_without_status_code():
6787
raise ExceptionWithoutStatusCode("Async test exception without status_code")
6888

6989

90+
async def async_function_raises_unstructured_ingest_error():
91+
"""Async function that raises UnstructuredIngestError."""
92+
error = UnstructuredIngestError("Async test UnstructuredIngestError")
93+
error.status_code = 503
94+
raise error
95+
96+
7097
# Async generator versions for streaming response error tests
7198
async def async_gen_function_raises_exception_with_none_status_code():
7299
"""Async generator that raises an exception with status_code=None."""
@@ -90,3 +117,23 @@ async def async_gen_function_raises_exception_without_status_code():
90117
if False: # This ensures the function is detected as a generator but never yields
91118
yield None
92119
raise ExceptionWithoutStatusCode("Async gen test exception without status_code")
120+
121+
122+
async def async_gen_function_raises_unstructured_ingest_error():
123+
"""Async generator that raises UnstructuredIngestError."""
124+
# Don't yield anything, just raise the exception
125+
if False: # This ensures the function is detected as a generator but never yields
126+
yield None
127+
error = UnstructuredIngestError("Async gen test UnstructuredIngestError")
128+
error.status_code = 502
129+
raise error
130+
131+
132+
async def async_gen_function_raises_unstructured_ingest_error_with_none_status_code():
133+
"""Async generator that raises UnstructuredIngestError with None status_code."""
134+
# Don't yield anything, just raise the exception
135+
if False: # This ensures the function is detected as a generator but never yields
136+
yield None
137+
error = UnstructuredIngestError("Async gen test UnstructuredIngestError with None status_code")
138+
error.status_code = None
139+
raise error
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.40" # pragma: no cover
1+
__version__ = "0.0.41" # pragma: no cover

unstructured_platform_plugins/etl_uvicorn/api_generator.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pydantic import BaseModel, Field, create_model
1313
from starlette.responses import RedirectResponse
1414
from unstructured_ingest.data_types.file_data import BatchFileData, FileData, file_data_from_dict
15+
from unstructured_ingest.error import UnstructuredIngestError
1516
from uvicorn.config import LOG_LEVELS
1617
from uvicorn.importer import import_from_string
1718

@@ -223,6 +224,19 @@ async def _stream_response():
223224
else exc.detail,
224225
file_data=request_dict.get("file_data", None),
225226
)
227+
except UnstructuredIngestError as exc:
228+
logger.error(
229+
f"UnstructuredIngestError: {str(exc)} (status_code={exc.status_code})",
230+
exc_info=True,
231+
)
232+
return InvokeResponse(
233+
usage=usage,
234+
message_channels=message_channels,
235+
filedata_meta=filedata_meta_model.model_validate(filedata_meta.model_dump()),
236+
status_code=exc.status_code or status.HTTP_500_INTERNAL_SERVER_ERROR,
237+
status_code_text=str(exc),
238+
file_data=request_dict.get("file_data", None),
239+
)
226240
except Exception as invoke_error:
227241
logger.error(f"failed to invoke plugin: {invoke_error}", exc_info=True)
228242
return InvokeResponse(

0 commit comments

Comments
 (0)