77
88import pytest
99from deepdiff import DeepDiff
10+ from httpx import RequestError
1011from unstructured_client import UnstructuredClient
1112from unstructured_client .models import shared , operations
1213from unstructured_client .models .errors import SDKError , ServerError , HTTPValidationError
@@ -348,3 +349,77 @@ def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provid
348349 assert response .status_code == 200
349350 assert len (response .elements ) > 0
350351 assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
352+
353+
354+ @pytest .mark .parametrize (
355+ ("pdf_name" , "expected_error_message" ),
356+ [
357+ (
358+ "failing-encrypted.pdf" ,
359+ "File is encrypted. Please decrypt it with password." ,
360+ ),
361+ (
362+ "failing-missing-root.pdf" ,
363+ "File does not appear to be a valid PDF. Error: Cannot find Root object in pdf" ,
364+ ),
365+ (
366+ "failing-missing-pages.pdf" ,
367+ "File does not appear to be a valid PDF. Error: Invalid object in /Pages" ,
368+ ),
369+ ],
370+ )
371+ def test_returns_request_error_for_invalid_pdf (
372+ caplog : pytest .LogCaptureFixture ,
373+ doc_path : Path ,
374+ client : UnstructuredClient ,
375+ pdf_name : str ,
376+ expected_error_message : str ,
377+ ):
378+ """Test that we get a RequestError with the correct error message for invalid PDF files."""
379+ with open (doc_path / pdf_name , "rb" ) as f :
380+ files = shared .Files (
381+ content = f .read (),
382+ file_name = pdf_name ,
383+ )
384+
385+ req = operations .PartitionRequest (
386+ partition_parameters = shared .PartitionParameters (
387+ files = files ,
388+ strategy = "fast" ,
389+ split_pdf_page = True ,
390+ )
391+ )
392+
393+ with pytest .raises (RequestError ) as exc_info :
394+ client .general .partition (request = req )
395+
396+ assert exc_info .value .request is not None
397+ assert expected_error_message in caplog .text
398+
399+
400+ def test_returns_422_for_invalid_pdf (
401+ caplog : pytest .LogCaptureFixture ,
402+ doc_path : Path ,
403+ client : UnstructuredClient ,
404+ ):
405+ """Test that we get a RequestError with the correct error message for invalid PDF files."""
406+ pdf_name = "failing-invalid.pdf"
407+ with open (doc_path / pdf_name , "rb" ) as f :
408+ files = shared .Files (
409+ content = f .read (),
410+ file_name = pdf_name ,
411+ )
412+
413+ req = operations .PartitionRequest (
414+ partition_parameters = shared .PartitionParameters (
415+ files = files ,
416+ strategy = "fast" ,
417+ split_pdf_page = True ,
418+ )
419+ )
420+
421+ with pytest .raises (HTTPValidationError ):
422+ client .general .partition (request = req )
423+
424+ assert "File does not appear to be a valid PDF" in caplog .text
425+ assert "422" in caplog .text
0 commit comments