Skip to content

Commit 6b19228

Browse files
committed
test complete
1 parent 18a70e0 commit 6b19228

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

services/dask-sidecar/tests/unit/test_file_utils.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# pylint: disable=unused-variable
44

55
import asyncio
6+
import hashlib
67
import mimetypes
78
import zipfile
89
from collections.abc import AsyncIterable
@@ -375,3 +376,74 @@ async def test_pull_compressed_zip_file_from_remote(
375376
assert file.exists()
376377
assert file.name in file_names_within_zip_file
377378
mocked_log_publishing_cb.assert_called()
379+
380+
381+
def _compute_hash(file_path: Path) -> str:
382+
with file_path.open("rb") as file_to_hash:
383+
file_hash = hashlib.sha256()
384+
chunk = file_to_hash.read(8192)
385+
while chunk:
386+
file_hash.update(chunk)
387+
chunk = file_to_hash.read(8192)
388+
389+
return file_hash.hexdigest()
390+
391+
392+
async def test_push_file_to_remote_creates_reproducible_zip_archive(
393+
remote_parameters: StorageParameters,
394+
tmp_path: Path,
395+
faker: Faker,
396+
mocked_log_publishing_cb: mock.AsyncMock,
397+
):
398+
destination_url1 = parse_obj_as(AnyUrl, f"{remote_parameters.remote_file_url}1.zip")
399+
destination_url2 = parse_obj_as(AnyUrl, f"{remote_parameters.remote_file_url}2.zip")
400+
src_path = tmp_path / faker.file_name()
401+
TEXT_IN_FILE = faker.text()
402+
src_path.write_text(TEXT_IN_FILE)
403+
assert src_path.exists()
404+
405+
# pushing 2 times should produce the same archive with the same hash
406+
await push_file_to_remote(
407+
src_path,
408+
destination_url1,
409+
mocked_log_publishing_cb,
410+
remote_parameters.s3_settings,
411+
)
412+
await asyncio.sleep(
413+
5
414+
) # NOTE: we wait a bit to ensure the created zipfile has a different creation time (that is normally used for computing the hash)
415+
await push_file_to_remote(
416+
src_path,
417+
destination_url2,
418+
mocked_log_publishing_cb,
419+
remote_parameters.s3_settings,
420+
)
421+
422+
# now we pull both file and compare their hash
423+
424+
# USE-CASE 1: if destination is a zip then no decompression is done
425+
download_folder = tmp_path / "download"
426+
download_folder.mkdir(parents=True, exist_ok=True)
427+
assert download_folder.exists()
428+
dst_path1 = download_folder / f"{faker.file_name()}1.zip"
429+
dst_path2 = download_folder / f"{faker.file_name()}2.zip"
430+
431+
await pull_file_from_remote(
432+
src_url=destination_url1,
433+
target_mime_type=None,
434+
dst_path=dst_path1,
435+
log_publishing_cb=mocked_log_publishing_cb,
436+
s3_settings=remote_parameters.s3_settings,
437+
)
438+
assert dst_path1.exists()
439+
440+
await pull_file_from_remote(
441+
src_url=destination_url2,
442+
target_mime_type=None,
443+
dst_path=dst_path2,
444+
log_publishing_cb=mocked_log_publishing_cb,
445+
s3_settings=remote_parameters.s3_settings,
446+
)
447+
assert dst_path2.exists()
448+
449+
assert _compute_hash(dst_path1) == _compute_hash(dst_path2)

0 commit comments

Comments
 (0)