|
1 | 1 | # tests/test_upload_large_folder.py
|
| 2 | +import unittest |
| 3 | +from unittest.mock import MagicMock, patch |
| 4 | + |
2 | 5 | import pytest
|
3 | 6 |
|
4 |
| -from huggingface_hub._upload_large_folder import COMMIT_SIZE_SCALE, LargeUploadStatus |
| 7 | +from huggingface_hub._upload_large_folder import ( |
| 8 | + COMMIT_SIZE_SCALE, |
| 9 | + MAX_FILES_PER_FOLDER, |
| 10 | + MAX_FILES_PER_REPO, |
| 11 | + LargeUploadStatus, |
| 12 | + _validate_upload_limits, |
| 13 | +) |
5 | 14 |
|
6 | 15 |
|
7 | 16 | @pytest.fixture
|
@@ -32,3 +41,117 @@ def test_update_chunk_transitions(status, start_idx, success, delta_items, durat
|
32 | 41 |
|
33 | 42 | assert status._chunk_idx == expected_idx
|
34 | 43 | assert status.target_chunk() == COMMIT_SIZE_SCALE[expected_idx]
|
| 44 | + |
| 45 | + |
| 46 | +class TestValidateUploadLimits(unittest.TestCase): |
| 47 | + """Test the _validate_upload_limits function directly.""" |
| 48 | + |
| 49 | + class MockPath: |
| 50 | + """Mock object to simulate LocalUploadFilePaths.""" |
| 51 | + |
| 52 | + def __init__(self, path_in_repo, size_bytes=1000): |
| 53 | + self.path_in_repo = path_in_repo |
| 54 | + self.file_path = MagicMock() |
| 55 | + self.file_path.stat.return_value.st_size = size_bytes |
| 56 | + |
| 57 | + @patch("huggingface_hub._upload_large_folder.logger") |
| 58 | + def test_no_warnings_under_limits(self, mock_logger): |
| 59 | + """Test that no warnings are issued when under all limits.""" |
| 60 | + paths = [ |
| 61 | + self.MockPath("file1.txt"), |
| 62 | + self.MockPath("data/file2.txt"), |
| 63 | + self.MockPath("data/sub/file3.txt"), |
| 64 | + ] |
| 65 | + _validate_upload_limits(paths) |
| 66 | + |
| 67 | + # Should only have info messages, no warnings |
| 68 | + mock_logger.warning.assert_not_called() |
| 69 | + |
| 70 | + @patch("huggingface_hub._upload_large_folder.logger") |
| 71 | + def test_warns_too_many_total_files(self, mock_logger): |
| 72 | + """Test warning when total files exceed MAX_FILES_PER_REPO.""" |
| 73 | + # Create a list with more files than the limit |
| 74 | + paths = [self.MockPath(f"file{i}.txt") for i in range(MAX_FILES_PER_REPO + 10)] |
| 75 | + _validate_upload_limits(paths) |
| 76 | + |
| 77 | + # Check that the appropriate warning was logged |
| 78 | + warning_calls = [str(call) for call in mock_logger.warning.call_args_list] |
| 79 | + assert any(f"{MAX_FILES_PER_REPO + 10:,} files" in call for call in warning_calls) |
| 80 | + assert any("exceeds the recommended limit" in call for call in warning_calls) |
| 81 | + |
| 82 | + @patch("huggingface_hub._upload_large_folder.logger") |
| 83 | + def test_warns_too_many_subdirectories(self, mock_logger): |
| 84 | + """Test warning when a folder has too many subdirectories.""" |
| 85 | + # Create files in many subdirectories under "data" |
| 86 | + paths = [] |
| 87 | + for i in range(MAX_FILES_PER_FOLDER + 10): |
| 88 | + paths.append(self.MockPath(f"data/subdir{i:05d}/file.txt")) |
| 89 | + |
| 90 | + _validate_upload_limits(paths) |
| 91 | + |
| 92 | + # Check that warning mentions subdirectories in "data" folder |
| 93 | + warning_calls = [str(call) for call in mock_logger.warning.call_args_list] |
| 94 | + assert any("data" in call and "subdirectories" in call for call in warning_calls) |
| 95 | + assert any(f"{MAX_FILES_PER_FOLDER + 10:,} subdirectories" in call for call in warning_calls) |
| 96 | + |
| 97 | + @patch("huggingface_hub._upload_large_folder.logger") |
| 98 | + def test_counts_files_and_subdirs_separately(self, mock_logger): |
| 99 | + """Test that files and subdirectories are counted separately and correctly.""" |
| 100 | + # Create a structure with both files and subdirs in "data" |
| 101 | + paths = [] |
| 102 | + # Add 5000 files directly in data/ |
| 103 | + for i in range(5000): |
| 104 | + paths.append(self.MockPath(f"data/file{i}.txt")) |
| 105 | + # Add 5100 subdirectories with files (exceeds limit when combined) |
| 106 | + for i in range(5100): |
| 107 | + paths.append(self.MockPath(f"data/subdir{i}/file.txt")) |
| 108 | + |
| 109 | + _validate_upload_limits(paths) |
| 110 | + |
| 111 | + # Should warn about "data" having 10,100 entries (5000 files + 5100 subdirs) |
| 112 | + warning_calls = [str(call) for call in mock_logger.warning.call_args_list] |
| 113 | + assert any("data" in call and "10,100 entries" in call for call in warning_calls) |
| 114 | + assert any("5,000 files" in call and "5,100 subdirectories" in call for call in warning_calls) |
| 115 | + |
| 116 | + @patch("huggingface_hub._upload_large_folder.logger") |
| 117 | + def test_file_size_decimal_gb(self, mock_logger): |
| 118 | + """Test that file sizes are calculated using decimal GB (10^9 bytes).""" |
| 119 | + # Create a file that's 21 GB in decimal (21 * 10^9 bytes) |
| 120 | + size_bytes = 21 * 1_000_000_000 |
| 121 | + paths = [self.MockPath("large_file.bin", size_bytes=size_bytes)] |
| 122 | + |
| 123 | + _validate_upload_limits(paths) |
| 124 | + |
| 125 | + # Should warn about file being larger than 20GB recommended |
| 126 | + warning_calls = [str(call) for call in mock_logger.warning.call_args_list] |
| 127 | + assert any("21.0GB" in call or "21GB" in call for call in warning_calls) |
| 128 | + assert any("20GB (recommended limit)" in call for call in warning_calls) |
| 129 | + |
| 130 | + @patch("huggingface_hub._upload_large_folder.logger") |
| 131 | + def test_very_large_file_warning(self, mock_logger): |
| 132 | + """Test warning for files exceeding hard limit (50GB).""" |
| 133 | + # Create a file that's 51 GB |
| 134 | + size_bytes = 51 * 1_000_000_000 |
| 135 | + paths = [self.MockPath("huge_file.bin", size_bytes=size_bytes)] |
| 136 | + |
| 137 | + _validate_upload_limits(paths) |
| 138 | + |
| 139 | + # Should warn about file exceeding 50GB hard limit |
| 140 | + warning_calls = [str(call) for call in mock_logger.warning.call_args_list] |
| 141 | + assert any("51.0GB" in call or "51GB" in call for call in warning_calls) |
| 142 | + assert any("50GB hard limit" in call for call in warning_calls) |
| 143 | + |
| 144 | + @patch("huggingface_hub._upload_large_folder.logger") |
| 145 | + def test_nested_directory_structure(self, mock_logger): |
| 146 | + """Test correct handling of deeply nested directory structures.""" |
| 147 | + paths = [ |
| 148 | + self.MockPath("a/b/c/d/e/file1.txt"), |
| 149 | + self.MockPath("a/b/c/d/e/file2.txt"), |
| 150 | + self.MockPath("a/b/c/d/f/file3.txt"), |
| 151 | + self.MockPath("a/b/c/g/file4.txt"), |
| 152 | + ] |
| 153 | + |
| 154 | + _validate_upload_limits(paths) |
| 155 | + |
| 156 | + # Should not warn - each folder has at most 2 entries |
| 157 | + mock_logger.warning.assert_not_called() |
0 commit comments