|
| 1 | +from unittest.mock import Mock |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers |
| 6 | +from unstructured_ingest.error import SourceConnectionError |
| 7 | +from unstructured_ingest.processes.connectors.sharepoint import ( |
| 8 | + SharepointConnectionConfig, |
| 9 | + SharepointDownloader, |
| 10 | + SharepointDownloaderConfig, |
| 11 | +) |
| 12 | + |
| 13 | + |
| 14 | +@pytest.fixture |
| 15 | +def mock_client(): |
| 16 | + return Mock() |
| 17 | + |
| 18 | + |
| 19 | +@pytest.fixture |
| 20 | +def mock_site(): |
| 21 | + return Mock() |
| 22 | + |
| 23 | + |
| 24 | +@pytest.fixture |
| 25 | +def mock_drive_item(): |
| 26 | + return Mock() |
| 27 | + |
| 28 | + |
| 29 | +@pytest.fixture |
| 30 | +def mock_file(): |
| 31 | + return Mock() |
| 32 | + |
| 33 | + |
| 34 | +@pytest.fixture |
| 35 | +def mock_connection_config(mock_client, mock_drive_item): |
| 36 | + config = Mock(spec=SharepointConnectionConfig) |
| 37 | + config.site = "https://test.sharepoint.com/sites/test" |
| 38 | + config.get_client.return_value = mock_client |
| 39 | + config._get_drive_item.return_value = mock_drive_item |
| 40 | + return config |
| 41 | + |
| 42 | + |
| 43 | +@pytest.fixture |
| 44 | +def mock_download_config(): |
| 45 | + config = Mock(spec=SharepointDownloaderConfig) |
| 46 | + config.max_retries = 3 |
| 47 | + return config |
| 48 | + |
| 49 | + |
| 50 | +@pytest.fixture |
| 51 | +def sharepoint_downloader(mock_connection_config, mock_download_config): |
| 52 | + downloader = SharepointDownloader( |
| 53 | + connection_config=mock_connection_config, download_config=mock_download_config |
| 54 | + ) |
| 55 | + return downloader |
| 56 | + |
| 57 | + |
| 58 | +@pytest.fixture |
| 59 | +def file_data(): |
| 60 | + return FileData( |
| 61 | + source_identifiers=SourceIdentifiers( |
| 62 | + filename="test.docx", fullpath="/sites/test/Shared Documents/test.docx" |
| 63 | + ), |
| 64 | + connector_type="sharepoint", |
| 65 | + identifier="test-id", |
| 66 | + ) |
| 67 | + |
| 68 | + |
| 69 | +def test_fetch_file( |
| 70 | + mock_client, mock_drive_item, mock_site, mock_file, sharepoint_downloader, file_data |
| 71 | +): |
| 72 | + """Test successful file fetch without any errors""" |
| 73 | + mock_client.sites.get_by_url.return_value.get.return_value.execute_query.return_value = ( |
| 74 | + mock_site |
| 75 | + ) |
| 76 | + mock_drive_item.get_by_path.return_value.get.return_value.execute_query.return_value = mock_file |
| 77 | + result = sharepoint_downloader._fetch_file(file_data) |
| 78 | + |
| 79 | + assert result == mock_file |
| 80 | + assert mock_client.sites.get_by_url.return_value.get.return_value.execute_query.call_count == 1 |
| 81 | + assert mock_drive_item.get_by_path.return_value.get.return_value.execute_query.call_count == 1 |
| 82 | + mock_drive_item.get_by_path.assert_called_with("/sites/test/Shared Documents/test.docx") |
| 83 | + |
| 84 | + |
| 85 | +def test_fetch_file_retries_on_429_error( |
| 86 | + mock_client, mock_drive_item, mock_site, sharepoint_downloader, file_data |
| 87 | +): |
| 88 | + """Test that _fetch_file retries when encountering 429 errors""" |
| 89 | + mock_client.sites.get_by_url.return_value.get.return_value.execute_query.return_value = ( |
| 90 | + mock_site |
| 91 | + ) |
| 92 | + mock_drive_item.get_by_path.return_value.get.return_value.execute_query.side_effect = [ |
| 93 | + Exception("429 Client Error"), |
| 94 | + Exception("Request has been throttled"), |
| 95 | + mock_file, |
| 96 | + ] |
| 97 | + |
| 98 | + result = sharepoint_downloader._fetch_file(file_data) |
| 99 | + assert result == mock_file |
| 100 | + assert mock_drive_item.get_by_path.return_value.get.return_value.execute_query.call_count == 3 |
| 101 | + |
| 102 | + |
| 103 | +def test_fetch_file_fails_after_max_retries( |
| 104 | + mock_client, mock_drive_item, mock_site, sharepoint_downloader, file_data |
| 105 | +): |
| 106 | + """Test that _fetch_file fails after exhausting max retries""" |
| 107 | + mock_client.sites.get_by_url.return_value.get.return_value.execute_query.return_value = ( |
| 108 | + mock_site |
| 109 | + ) |
| 110 | + mock_drive_item.get_by_path.return_value.get.return_value.execute_query.side_effect = Exception( |
| 111 | + "429 Client Error" |
| 112 | + ) |
| 113 | + |
| 114 | + with pytest.raises(Exception, match="429"): |
| 115 | + sharepoint_downloader._fetch_file(file_data) |
| 116 | + |
| 117 | + expected_calls = sharepoint_downloader.download_config.max_retries |
| 118 | + assert ( |
| 119 | + mock_drive_item.get_by_path.return_value.get.return_value.execute_query.call_count |
| 120 | + == expected_calls |
| 121 | + ) |
| 122 | + |
| 123 | + |
| 124 | +def test_fetch_file_handles_site_not_found_immediately( |
| 125 | + mock_client, sharepoint_downloader, file_data |
| 126 | +): |
| 127 | + """Test that site not found errors are not retried""" |
| 128 | + mock_client.sites.get_by_url.return_value.get.return_value.execute_query.side_effect = ( |
| 129 | + Exception("Site not found") |
| 130 | + ) |
| 131 | + |
| 132 | + with pytest.raises(SourceConnectionError, match="Site not found"): |
| 133 | + sharepoint_downloader._fetch_file(file_data) |
| 134 | + |
| 135 | + assert mock_client.sites.get_by_url.return_value.get.return_value.execute_query.call_count == 1 |
0 commit comments