88from unittest .mock import MagicMock , patch
99
1010import httpx
11+ from httpx import RequestError
1112import pytest
1213import requests
1314from requests_toolbelt import MultipartDecoder
1415
16+ from _test_unstructured_client .unit_utils import sample_docs_path
1517from unstructured_client ._hooks .custom import form_utils , pdf_utils , request_utils
1618from unstructured_client ._hooks .custom .form_utils import (
1719 FormData ,
2931 SplitPdfHook ,
3032 get_optimal_split_size , run_tasks ,
3133)
34+ from unstructured_client ._hooks .types import BeforeRequestContext
3235from unstructured_client .models import shared
3336
3437
@@ -462,3 +465,64 @@ def test_unit_get_split_pdf_cache_tmp_data_dir_uses_dir_from_form_data(mock_path
462465 mock_path .assert_called_once_with (mock_dir )
463466 mock_path_instance .exists .assert_called_once ()
464467 assert result == str (Path (mock_dir ).resolve ())
468+
469+
470+ def test_before_request_raises_request_error_when_pdf_check_fails ():
471+ """Test that before_request raises RequestError when pdf_utils.check_pdf throws PDFValidationError."""
472+ hook = SplitPdfHook ()
473+
474+ # Initialize the hook with a mock client
475+ mock_client = MagicMock ()
476+ hook .sdk_init (base_url = "http://localhost:8888" , client = mock_client )
477+
478+ # Create a mock request context
479+ mock_hook_ctx = MagicMock ()
480+ mock_hook_ctx .operation_id = "partition"
481+
482+ # Create a mock request with proper headers and content
483+ mock_request = MagicMock ()
484+ mock_request .headers = {"Content-Type" : "multipart/form-data" }
485+ mock_request .url .host = "localhost"
486+
487+ # Mock the form data to include the necessary fields for PDF splitting
488+ mock_pdf_file = MagicMock ()
489+ mock_pdf_file .read .return_value = b"mock_pdf_content"
490+
491+ mock_form_data = {
492+ "split_pdf_page" : "true" ,
493+ "files" : {
494+ "filename" : "test.pdf" ,
495+ "content_type" : "application/pdf" ,
496+ "file" : mock_pdf_file
497+ }
498+ }
499+
500+ # Mock the PDF reader object
501+ mock_pdf_reader = MagicMock ()
502+
503+ # Define the error message that will be raised
504+ error_message = "File does not appear to be a valid PDF."
505+
506+ with patch ("unstructured_client._hooks.custom.request_utils.get_multipart_stream_fields" ) as mock_get_fields , \
507+ patch ("unstructured_client._hooks.custom.pdf_utils.read_pdf" ) as mock_read_pdf , \
508+ patch ("unstructured_client._hooks.custom.pdf_utils.check_pdf" ) as mock_check_pdf , \
509+ patch ("unstructured_client._hooks.custom.request_utils.get_base_url" ) as mock_get_base_url :
510+
511+ # Set up the mocks
512+ mock_get_fields .return_value = mock_form_data
513+ mock_read_pdf .return_value = mock_pdf_reader
514+ mock_check_pdf .side_effect = pdf_utils .PDFValidationError (error_message )
515+ mock_get_base_url .return_value = "http://localhost:8888"
516+
517+ # Call the method under test and verify it raises RequestError
518+ with pytest .raises (RequestError ) as exc_info :
519+ hook .before_request (mock_hook_ctx , mock_request )
520+
521+ # Verify the exception has the correct message and request object
522+ assert str (exc_info .value ) == error_message
523+ assert exc_info .value .request == mock_request
524+
525+ # Verify that the mocked functions were called as expected
526+ mock_get_fields .assert_called_once_with (mock_request )
527+ mock_read_pdf .assert_called_once_with (mock_pdf_file )
528+ mock_check_pdf .assert_called_once_with (mock_pdf_reader )
0 commit comments