Skip to content

Commit 91f335b

Browse files
chore: clean up tests for invalid pdfs
1 parent 53e5b70 commit 91f335b

File tree

1 file changed

+54
-46
lines changed

1 file changed

+54
-46
lines changed

test_general/api/test_app.py

Lines changed: 54 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,9 @@ def test_general_api_returns_422_bad_pdf():
495495
response = client.post(
496496
MAIN_API_ROUTE, files=[("files", (str(tmp.name), open(tmp.name, "rb"), "application/pdf"))]
497497
)
498-
assert response.json() == {"detail": "File does not appear to be a valid PDF"}
498+
assert response.json() == {
499+
"detail": "File does not appear to be a valid PDF. Error: Cannot read an empty file"
500+
}
499501
assert response.status_code == 422
500502
tmp.close()
501503

@@ -506,10 +508,58 @@ def test_general_api_returns_422_bad_pdf():
506508
files=[("files", (str(test_file), open(test_file, "rb"), "application/pdf"))],
507509
)
508510

509-
assert response.json() == {"detail": "File does not appear to be a valid PDF"}
511+
assert response.json() == {
512+
"detail": "File does not appear to be a valid PDF. Error: Cannot read an empty file"
513+
}
510514
assert response.status_code == 422
511515

512516

517+
@pytest.mark.parametrize(
518+
("pdf_name", "expected_error_message"),
519+
[
520+
(
521+
"failing-invalid.pdf",
522+
"File does not appear to be a valid PDF. Error: Stream has ended unexpectedly",
523+
),
524+
(
525+
"failing-missing-root.pdf",
526+
"File does not appear to be a valid PDF. Error: Cannot find Root object in pdf",
527+
),
528+
(
529+
"failing-missing-pages.pdf",
530+
"File does not appear to be a valid PDF. Error: Invalid object in /Pages",
531+
),
532+
],
533+
)
534+
@pytest.mark.parametrize(
535+
"strategy",
536+
[
537+
"auto",
538+
"fast",
539+
"hi_res",
540+
"ocr_only",
541+
],
542+
)
543+
def test_general_api_returns_422_invalid_pdf(
544+
pdf_name: str, expected_error_message: str, strategy: str
545+
):
546+
"""
547+
Verify that we get a 422 with the correct error message for invalid PDF files
548+
"""
549+
client = TestClient(app)
550+
test_file = Path(__file__).parent.parent.parent / "sample-docs" / pdf_name
551+
552+
with open(test_file, "rb") as f:
553+
response = client.post(
554+
MAIN_API_ROUTE,
555+
files=[("files", (str(test_file), f))],
556+
data={"strategy": strategy},
557+
)
558+
559+
assert response.status_code == 422
560+
assert expected_error_message == str(response.json()["detail"])
561+
562+
513563
def test_general_api_returns_503(monkeypatch):
514564
"""
515565
When available memory is below the minimum. return a 503, unless our origin ip is 10.{4,5}.x.x
@@ -939,13 +989,13 @@ def test_encrypted_pdf():
939989
writer.encrypt(user_password="password123")
940990
writer.write(temp_file.name)
941991

942-
# Response should be 400
992+
# Response should be 422
943993
response = client.post(
944994
MAIN_API_ROUTE,
945995
files=[("files", (str(temp_file.name), open(temp_file.name, "rb"), "application/pdf"))],
946996
)
947997
assert response.json() == {"detail": "File is encrypted. Please decrypt it with password."}
948-
assert response.status_code == 400
998+
assert response.status_code == 422
949999

9501000
# This file is owner encrypted, i.e. readable with edit restrictions
9511001
writer = PdfWriter()
@@ -1155,45 +1205,3 @@ def test_include_slide_notes(monkeypatch, test_default, include_slide_notes, tes
11551205
assert "Here are important notes" == df["text"][0]
11561206
else:
11571207
assert "Here are important notes" != df["text"][0]
1158-
1159-
1160-
@pytest.mark.parametrize(
1161-
("pdf_name", "expected_error_message"),
1162-
[
1163-
("failing-encrypted.pdf", "File is encrypted. Please decrypt it with password."),
1164-
(
1165-
"failing-invalid.pdf",
1166-
"File does not appear to be a valid PDF. Error: Stream has ended unexpectedly",
1167-
),
1168-
(
1169-
"failing-missing-root.pdf",
1170-
"File does not appear to be a valid PDF. Error: Cannot find Root object in pdf",
1171-
),
1172-
(
1173-
"failing-missing-pages.pdf",
1174-
"File does not appear to be a valid PDF. Error: Invalid object in /Pages",
1175-
),
1176-
],
1177-
)
1178-
@pytest.mark.parametrize(
1179-
"strategy",
1180-
[
1181-
"auto",
1182-
"fast",
1183-
"hi_res",
1184-
"ocr_only",
1185-
],
1186-
)
1187-
def test_failing_pdfs_return_422(pdf_name: str, expected_error_message: str, strategy: str):
1188-
client = TestClient(app)
1189-
test_file = Path(__file__).parent.parent.parent / "sample-docs" / pdf_name
1190-
1191-
with open(test_file, "rb") as f:
1192-
response = client.post(
1193-
MAIN_API_ROUTE,
1194-
files=[("files", (str(test_file), f))],
1195-
data={"strategy": strategy},
1196-
)
1197-
1198-
assert response.status_code == 422
1199-
assert expected_error_message == str(response.json()["detail"])

0 commit comments

Comments
 (0)