|
1 | 1 | import json |
2 | 2 | import os |
3 | 3 | import uuid |
4 | | -from contextlib import contextmanager |
| 4 | +from contextlib import contextmanager, suppress |
5 | 5 | from dataclasses import dataclass |
6 | 6 | from pathlib import Path |
7 | 7 | from unittest import mock |
@@ -205,33 +205,39 @@ def databricks_destination_context( |
205 | 205 | yield client |
206 | 206 | finally: |
207 | 207 | # Cleanup |
208 | | - try: |
209 | | - for file in client.files.list_directory_contents( |
210 | | - directory_path=_get_volume_path(env_data.catalog, volume, volume_path) |
211 | | - ): |
212 | | - client.files.delete(file.path) |
213 | | - client.files.delete_directory(_get_volume_path(env_data.catalog, volume, volume_path)) |
214 | | - except NotFound: |
215 | | - # Directory was never created, don't need to delete |
216 | | - pass |
217 | | - |
218 | | - |
219 | | -def validate_upload(client: WorkspaceClient, catalog: str, volume: str, volume_path: str): |
220 | | - files = list( |
221 | | - client.files.list_directory_contents( |
222 | | - directory_path=_get_volume_path(catalog, volume, volume_path) |
223 | | - ) |
224 | | - ) |
| 208 | + with suppress(NotFound): |
| 209 | + client.workspace.delete( |
| 210 | + path=_get_volume_path(env_data.catalog, volume, volume_path), recursive=True |
| 211 | + ) |
| 212 | + |
| 213 | + |
| 214 | +def list_files_recursively(client: WorkspaceClient, path: str): |
| 215 | + files = [] |
| 216 | + objects = client.files.list_directory_contents(path) |
| 217 | + for obj in objects: |
| 218 | + full_path = obj.path |
| 219 | + if obj.is_directory: |
| 220 | + files.extend(list_files_recursively(client, full_path)) |
| 221 | + else: |
| 222 | + files.append(full_path) |
| 223 | + return files |
| 224 | + |
| 225 | + |
| 226 | +def validate_upload( |
| 227 | + client: WorkspaceClient, catalog: str, volume: str, volume_path: str, num_files: int |
| 228 | +): |
| 229 | + files = list_files_recursively(client, _get_volume_path(catalog, volume, volume_path)) |
225 | 230 |
|
226 | | - assert len(files) == 1 |
| 231 | + assert len(files) == num_files |
227 | 232 |
|
228 | | - resp = client.files.download(files[0].path) |
229 | | - data = json.loads(resp.contents.read()) |
| 233 | + for i in range(num_files): |
| 234 | + resp = client.files.download(files[i]) |
| 235 | + data = json.loads(resp.contents.read()) |
230 | 236 |
|
231 | | - assert len(data) == 22 |
232 | | - element_types = {v["type"] for v in data} |
233 | | - assert len(element_types) == 1 |
234 | | - assert "CompositeElement" in element_types |
| 237 | + assert len(data) == 22 |
| 238 | + element_types = {v["type"] for v in data} |
| 239 | + assert len(element_types) == 1 |
| 240 | + assert "CompositeElement" in element_types |
235 | 241 |
|
236 | 242 |
|
237 | 243 | @pytest.mark.asyncio |
@@ -267,4 +273,52 @@ async def test_volumes_native_destination(upload_file: Path): |
267 | 273 | catalog=env_data.catalog, |
268 | 274 | volume="test-platform", |
269 | 275 | volume_path=volume_path, |
| 276 | + num_files=1, |
| 277 | + ) |
| 278 | + |
| 279 | + |
| 280 | +@pytest.mark.asyncio |
| 281 | +@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, BLOB_STORAGE_TAG) |
| 282 | +@requires_env( |
| 283 | + "DATABRICKS_HOST", "DATABRICKS_CLIENT_ID", "DATABRICKS_CLIENT_SECRET", "DATABRICKS_CATALOG" |
| 284 | +) |
| 285 | +async def test_volumes_native_destination_same_filenames_different_folder(upload_file: Path): |
| 286 | + env_data = get_basic_auth_env_data() |
| 287 | + volume_path = f"databricks-volumes-test-output-{uuid.uuid4()}" |
| 288 | + file_data_1 = FileData( |
| 289 | + source_identifiers=SourceIdentifiers( |
| 290 | + fullpath=f"folder1/{upload_file.name}", filename=upload_file.name |
| 291 | + ), |
| 292 | + connector_type=CONNECTOR_TYPE, |
| 293 | + identifier="mock file data", |
| 294 | + ) |
| 295 | + file_data_2 = FileData( |
| 296 | + source_identifiers=SourceIdentifiers( |
| 297 | + fullpath=f"folder2/{upload_file.name}", filename=upload_file.name |
| 298 | + ), |
| 299 | + connector_type=CONNECTOR_TYPE, |
| 300 | + identifier="mock file data", |
| 301 | + ) |
| 302 | + with databricks_destination_context( |
| 303 | + volume="test-platform", volume_path=volume_path, env_data=env_data |
| 304 | + ) as workspace_client: |
| 305 | + connection_config = env_data.get_connection_config() |
| 306 | + uploader = DatabricksNativeVolumesUploader( |
| 307 | + connection_config=connection_config, |
| 308 | + upload_config=DatabricksNativeVolumesUploaderConfig( |
| 309 | + volume="test-platform", |
| 310 | + volume_path=volume_path, |
| 311 | + catalog=env_data.catalog, |
| 312 | + ), |
| 313 | + ) |
| 314 | + uploader.precheck() |
| 315 | + uploader.run(path=upload_file, file_data=file_data_1) |
| 316 | + uploader.run(path=upload_file, file_data=file_data_2) |
| 317 | + |
| 318 | + validate_upload( |
| 319 | + client=workspace_client, |
| 320 | + catalog=env_data.catalog, |
| 321 | + volume="test-platform", |
| 322 | + volume_path=volume_path, |
| 323 | + num_files=2, |
270 | 324 | ) |
0 commit comments