Skip to content

Commit 1121f12

Browse files
authored
catch json ValueError and provide example schema (#281)
Closes #271 Catches ValueError raised by partition() and transforms it to a 400 error. Also provides an example of the Unstructured schema.
1 parent 528849e commit 1121f12

File tree

3 files changed

+34
-1
lines changed

3 files changed

+34
-1
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
* Bump unstructured to 0.10.21
44
* Fix an unhandled error when a non pdf file is sent with content-type pdf
5-
* Fix unhandled error when a non docx file is sent with content-type docx
5+
* Fix an unhandled error when a non docx file is sent with content-type docx
6+
* Fix an unhandled error when a non-Unstructured json schema is sent
67

78
## 0.0.51
89

prepline_general/api/general.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,11 @@ def pipeline_api(
457457
raise HTTPException(
458458
status_code=400, detail=f"{file_content_type} not currently supported"
459459
)
460+
if "Unstructured schema" in e.args[0]:
461+
raise HTTPException(
462+
status_code=400,
463+
detail="Json schema does not match the Unstructured schema",
464+
)
460465
raise e
461466
except zipfile.BadZipFile as e:
462467
if "File is not a zip file" in e.args[0]:

test_general/api/test_app.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pathlib import Path
2+
import os
23

34
import io
45
import pytest
@@ -760,3 +761,29 @@ def test_general_api_returns_400_bad_docx():
760761
)
761762
assert "txt is not a valid" in response.json().get("detail")
762763
assert response.status_code == 400
764+
765+
766+
def test_general_api_returns_400_bad_json(tmpdir):
767+
"""
768+
Verify that we get a 400 for invalid json schemas
769+
"""
770+
client = TestClient(app)
771+
data = '{"hi": "there"}'
772+
773+
filepath = os.path.join(tmpdir, "unprocessable.json")
774+
with open(filepath, "w") as f:
775+
f.write(data)
776+
response = client.post(
777+
MAIN_API_ROUTE,
778+
files=[
779+
(
780+
"files",
781+
(
782+
str(filepath),
783+
open(filepath, "rb"),
784+
),
785+
)
786+
],
787+
)
788+
assert "Unstructured schema" in response.json().get("detail")
789+
assert response.status_code == 400

0 commit comments

Comments
 (0)