|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | import asyncio |
4 | | -import io |
5 | | -import logging |
6 | 4 | from asyncio import Task |
7 | 5 | from collections import Counter |
8 | 6 | from functools import partial |
9 | | -from typing import Coroutine |
| 7 | +from pathlib import Path |
| 8 | +from unittest.mock import MagicMock, patch |
10 | 9 |
|
11 | 10 | import httpx |
12 | 11 | import pytest |
13 | 12 | import requests |
14 | | -from requests_toolbelt import MultipartDecoder, MultipartEncoder |
| 13 | +from requests_toolbelt import MultipartDecoder |
15 | 14 |
|
16 | 15 | from unstructured_client._hooks.custom import form_utils, pdf_utils, request_utils |
17 | 16 | from unstructured_client._hooks.custom.form_utils import ( |
|
20 | 19 | PARTITION_FORM_PAGE_RANGE_KEY, |
21 | 20 | ) |
22 | 21 | from unstructured_client._hooks.custom.split_pdf_hook import ( |
| 22 | + DEFAULT_CACHE_TMP_DATA_DIR, |
23 | 23 | DEFAULT_CONCURRENCY_LEVEL, |
24 | 24 | DEFAULT_STARTING_PAGE_NUMBER, |
25 | 25 | MAX_CONCURRENCY_LEVEL, |
@@ -434,3 +434,30 @@ async def test_remaining_tasks_cancelled_when_fails_disallowed(): |
434 | 434 | await asyncio.sleep(1) |
435 | 435 | print("Cancelled amount: ", cancelled_counter["cancelled"]) |
436 | 436 | assert len(tasks) > cancelled_counter["cancelled"] > 0 |
| 437 | + |
| 438 | + |
| 439 | +@patch("unstructured_client._hooks.custom.form_utils.Path") |
| 440 | +def test_unit_get_split_pdf_cache_tmp_data_dir_uses_dir_from_form_data(mock_path: MagicMock): |
| 441 | + """Test get_split_pdf_cache_tmp_data_dir uses the directory from the form data.""" |
| 442 | + # -- Create the form_data |
| 443 | + dir_key = form_utils.PARTITION_FORM_SPLIT_CACHE_TMP_DATA_DIR_KEY # -- "split_pdf_cache_tmp_data_dir" |
| 444 | + mock_dir = "/mock/dir" |
| 445 | + form_data = {dir_key: mock_dir} |
| 446 | + |
| 447 | + # -- Mock the Path object in form_utils |
| 448 | + mock_path_instance = MagicMock() |
| 449 | + mock_path.return_value = mock_path_instance |
| 450 | + mock_path_instance.exists.return_value = True |
| 451 | + mock_path_instance.resolve.return_value = Path(mock_dir) |
| 452 | + |
| 453 | + result = form_utils.get_split_pdf_cache_tmp_data_dir( |
| 454 | + form_data = form_data, |
| 455 | + key=dir_key, |
| 456 | + fallback_value=DEFAULT_CACHE_TMP_DATA_DIR # -- tempfile.gettempdir() |
| 457 | + ) |
| 458 | + |
| 459 | + assert dir_key == "split_pdf_cache_tmp_data_dir" |
| 460 | + assert form_data.get(dir_key) == "/mock/existing/dir" |
| 461 | + mock_path.assert_called_once_with(mock_dir) |
| 462 | + mock_path_instance.exists.assert_called_once() |
| 463 | + assert result == str(Path(mock_dir).resolve()) |
0 commit comments