@@ -495,7 +495,9 @@ def test_general_api_returns_422_bad_pdf():
495495 response = client .post (
496496 MAIN_API_ROUTE , files = [("files" , (str (tmp .name ), open (tmp .name , "rb" ), "application/pdf" ))]
497497 )
498- assert response .json () == {"detail" : "File does not appear to be a valid PDF" }
498+ assert response .json () == {
499+ "detail" : "File does not appear to be a valid PDF. Error: Cannot read an empty file"
500+ }
499501 assert response .status_code == 422
500502 tmp .close ()
501503
@@ -506,10 +508,58 @@ def test_general_api_returns_422_bad_pdf():
506508 files = [("files" , (str (test_file ), open (test_file , "rb" ), "application/pdf" ))],
507509 )
508510
509- assert response .json () == {"detail" : "File does not appear to be a valid PDF" }
511+ assert response .json () == {
512+ "detail" : "File does not appear to be a valid PDF. Error: Cannot read an empty file"
513+ }
510514 assert response .status_code == 422
511515
512516
517+ @pytest .mark .parametrize (
518+ ("pdf_name" , "expected_error_message" ),
519+ [
520+ (
521+ "failing-invalid.pdf" ,
522+ "File does not appear to be a valid PDF. Error: Stream has ended unexpectedly" ,
523+ ),
524+ (
525+ "failing-missing-root.pdf" ,
526+ "File does not appear to be a valid PDF. Error: Cannot find Root object in pdf" ,
527+ ),
528+ (
529+ "failing-missing-pages.pdf" ,
530+ "File does not appear to be a valid PDF. Error: Invalid object in /Pages" ,
531+ ),
532+ ],
533+ )
534+ @pytest .mark .parametrize (
535+ "strategy" ,
536+ [
537+ "auto" ,
538+ "fast" ,
539+ "hi_res" ,
540+ "ocr_only" ,
541+ ],
542+ )
543+ def test_general_api_returns_422_invalid_pdf (
544+ pdf_name : str , expected_error_message : str , strategy : str
545+ ):
546+ """
547+ Verify that we get a 422 with the correct error message for invalid PDF files
548+ """
549+ client = TestClient (app )
550+ test_file = Path (__file__ ).parent .parent .parent / "sample-docs" / pdf_name
551+
552+ with open (test_file , "rb" ) as f :
553+ response = client .post (
554+ MAIN_API_ROUTE ,
555+ files = [("files" , (str (test_file ), f ))],
556+ data = {"strategy" : strategy },
557+ )
558+
559+ assert response .status_code == 422
560+ assert expected_error_message == str (response .json ()["detail" ])
561+
562+
513563def test_general_api_returns_503 (monkeypatch ):
514564 """
515565 When available memory is below the minimum. return a 503, unless our origin ip is 10.{4,5}.x.x
@@ -939,13 +989,13 @@ def test_encrypted_pdf():
939989 writer .encrypt (user_password = "password123" )
940990 writer .write (temp_file .name )
941991
942- # Response should be 400
992+ # Response should be 422
943993 response = client .post (
944994 MAIN_API_ROUTE ,
945995 files = [("files" , (str (temp_file .name ), open (temp_file .name , "rb" ), "application/pdf" ))],
946996 )
947997 assert response .json () == {"detail" : "File is encrypted. Please decrypt it with password." }
948- assert response .status_code == 400
998+ assert response .status_code == 422
949999
9501000 # This file is owner encrypted, i.e. readable with edit restrictions
9511001 writer = PdfWriter ()
@@ -1155,45 +1205,3 @@ def test_include_slide_notes(monkeypatch, test_default, include_slide_notes, tes
11551205 assert "Here are important notes" == df ["text" ][0 ]
11561206 else :
11571207 assert "Here are important notes" != df ["text" ][0 ]
1158-
1159-
1160- @pytest .mark .parametrize (
1161- ("pdf_name" , "expected_error_message" ),
1162- [
1163- ("failing-encrypted.pdf" , "File is encrypted. Please decrypt it with password." ),
1164- (
1165- "failing-invalid.pdf" ,
1166- "File does not appear to be a valid PDF. Error: Stream has ended unexpectedly" ,
1167- ),
1168- (
1169- "failing-missing-root.pdf" ,
1170- "File does not appear to be a valid PDF. Error: Cannot find Root object in pdf" ,
1171- ),
1172- (
1173- "failing-missing-pages.pdf" ,
1174- "File does not appear to be a valid PDF. Error: Invalid object in /Pages" ,
1175- ),
1176- ],
1177- )
1178- @pytest .mark .parametrize (
1179- "strategy" ,
1180- [
1181- "auto" ,
1182- "fast" ,
1183- "hi_res" ,
1184- "ocr_only" ,
1185- ],
1186- )
1187- def test_failing_pdfs_return_422 (pdf_name : str , expected_error_message : str , strategy : str ):
1188- client = TestClient (app )
1189- test_file = Path (__file__ ).parent .parent .parent / "sample-docs" / pdf_name
1190-
1191- with open (test_file , "rb" ) as f :
1192- response = client .post (
1193- MAIN_API_ROUTE ,
1194- files = [("files" , (str (test_file ), f ))],
1195- data = {"strategy" : strategy },
1196- )
1197-
1198- assert response .status_code == 422
1199- assert expected_error_message == str (response .json ()["detail" ])
0 commit comments