Skip to content

Commit da1e85f

Browse files
authored
🎨 Maintenance: change default chunk size to compute checksums to 8MiB instead of 5KB (#7753)
1 parent 6647cd8 commit da1e85f

File tree

5 files changed

+37
-33
lines changed

5 files changed

+37
-33
lines changed

packages/service-library/src/servicelib/file_utils.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import asyncio
22
import hashlib
33
import shutil
4+
from collections.abc import Iterator
45
from contextlib import contextmanager
56
from logging import Logger
67
from pathlib import Path
7-
from typing import Final, Iterator, Protocol
8+
from typing import Final, Protocol
89

910
# https://docs.python.org/3/library/shutil.html#shutil.rmtree
1011
# https://docs.python.org/3/library/os.html#os.remove
@@ -13,11 +14,13 @@
1314
from pydantic import ByteSize, TypeAdapter
1415

1516
CHUNK_4KB: Final[ByteSize] = TypeAdapter(ByteSize).validate_python("4kb") # 4K blocks
17+
CHUNK_8MB: Final[ByteSize] = TypeAdapter(ByteSize).validate_python(
18+
"8MiB"
19+
) # 8mIB blocks
1620

1721

1822
class AsyncStream(Protocol):
19-
async def read(self, size: int = -1) -> bytes:
20-
...
23+
async def read(self, size: int = -1) -> bytes: ...
2124

2225

2326
_shutil_rmtree = sync_to_async(shutil.rmtree)
@@ -45,7 +48,7 @@ async def remove_directory(
4548

4649

4750
async def create_sha256_checksum(
48-
async_stream: AsyncStream, *, chunk_size: ByteSize = CHUNK_4KB
51+
async_stream: AsyncStream, *, chunk_size: ByteSize = CHUNK_8MB
4952
) -> str:
5053
"""
5154
Usage:

packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ async def _generate_checksum(
291291
return checksum
292292
if isinstance(path_to_upload, Path):
293293
async with aiofiles.open(path_to_upload, mode="rb") as f:
294-
checksum = SHA256Str(await create_sha256_checksum(f))
294+
checksum = await create_sha256_checksum(f)
295295
elif isinstance(path_to_upload, UploadableFileObject):
296296
checksum = path_to_upload.sha256_checksum
297297
return checksum

packages/simcore-sdk/tests/integration/conftest.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def _assign_config(
337337
@pytest.fixture
338338
async def r_clone_settings_factory(
339339
minio_s3_settings: S3Settings, storage_service: URL
340-
) -> Awaitable[RCloneSettings]:
340+
) -> Callable[[], Awaitable[RCloneSettings]]:
341341
async def _factory() -> RCloneSettings:
342342
settings = RCloneSettings(
343343
R_CLONE_S3=minio_s3_settings, R_CLONE_PROVIDER=S3Provider.MINIO
@@ -347,35 +347,35 @@ async def _factory() -> RCloneSettings:
347347

348348
return settings
349349

350-
return _factory()
350+
return _factory
351351

352352

353353
@pytest.fixture
354354
async def aws_s3_cli_settings_factory(
355355
minio_s3_settings: S3Settings, storage_service: URL
356-
) -> Awaitable[AwsS3CliSettings]:
356+
) -> Callable[[], Awaitable[AwsS3CliSettings]]:
357357
async def _factory() -> AwsS3CliSettings:
358358
settings = AwsS3CliSettings(AWS_S3_CLI_S3=minio_s3_settings)
359359
if not await is_aws_s3_cli_available(settings):
360360
pytest.skip("aws cli not installed")
361361

362362
return settings
363363

364-
return _factory()
364+
return _factory
365365

366366

367367
@pytest.fixture
368368
async def r_clone_settings(
369-
r_clone_settings_factory: Awaitable[RCloneSettings],
369+
r_clone_settings_factory: Callable[[], Awaitable[RCloneSettings]],
370370
) -> RCloneSettings:
371-
return await r_clone_settings_factory
371+
return await r_clone_settings_factory()
372372

373373

374374
@pytest.fixture
375375
async def aws_s3_cli_settings(
376-
aws_s3_cli_settings_factory: Awaitable[AwsS3CliSettings],
376+
aws_s3_cli_settings_factory: Callable[[], Awaitable[AwsS3CliSettings]],
377377
) -> AwsS3CliSettings:
378-
return await aws_s3_cli_settings_factory
378+
return await aws_s3_cli_settings_factory()
379379

380380

381381
@pytest.fixture

packages/simcore-sdk/tests/integration/test_node_ports_common_filemanager.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,17 @@ class _SyncSettings(BaseModel):
5656
"Both RClone and AwsS3Cli disabled",
5757
],
5858
)
59-
def optional_sync_settings(
60-
r_clone_settings: RCloneSettings,
61-
aws_s3_cli_settings: AwsS3CliSettings,
59+
async def optional_sync_settings(
60+
r_clone_settings_factory: Callable[[], Awaitable[RCloneSettings]],
61+
aws_s3_cli_settings_factory: Callable[[], Awaitable[AwsS3CliSettings]],
6262
request: pytest.FixtureRequest,
6363
) -> _SyncSettings:
6464
_rclone_enabled, _aws_s3_cli_enabled = request.param
6565

66-
_r_clone_settings = r_clone_settings if _rclone_enabled else None
67-
_aws_s3_cli_settings = aws_s3_cli_settings if _aws_s3_cli_enabled else None
66+
_r_clone_settings = await r_clone_settings_factory() if _rclone_enabled else None
67+
_aws_s3_cli_settings = (
68+
await aws_s3_cli_settings_factory() if _aws_s3_cli_enabled else None
69+
)
6870

6971
return _SyncSettings(
7072
r_clone_settings=_r_clone_settings, aws_s3_cli_settings=_aws_s3_cli_settings

packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import pytest
2222
import sqlalchemy as sa
2323
from faker import Faker
24-
from models_library.projects import ProjectIDStr
2524
from models_library.projects_nodes_io import (
2625
BaseFileLink,
2726
DownloadLink,
@@ -156,10 +155,10 @@ def config_value_symlink_path(symlink_path: Path) -> dict[str, Any]:
156155

157156
@pytest.fixture(params=[True, False])
158157
async def option_r_clone_settings(
159-
request, r_clone_settings_factory: Awaitable[RCloneSettings]
158+
request, r_clone_settings_factory: Callable[[], Awaitable[RCloneSettings]]
160159
) -> RCloneSettings | None:
161160
if request.param:
162-
return await r_clone_settings_factory
161+
return await r_clone_settings_factory()
163162
return None
164163

165164

@@ -174,7 +173,7 @@ async def test_default_configuration(
174173
await check_config_valid(
175174
await node_ports_v2.ports(
176175
user_id=user_id,
177-
project_id=ProjectIDStr(project_id),
176+
project_id=project_id,
178177
node_uuid=node_uuid,
179178
r_clone_settings=option_r_clone_settings,
180179
),
@@ -192,7 +191,7 @@ async def test_invalid_ports(
192191
config_dict, _, _ = create_special_configuration()
193192
PORTS = await node_ports_v2.ports(
194193
user_id=user_id,
195-
project_id=ProjectIDStr(project_id),
194+
project_id=project_id,
196195
node_uuid=node_uuid,
197196
r_clone_settings=option_r_clone_settings,
198197
)
@@ -238,7 +237,7 @@ async def test_port_value_accessors(
238237

239238
PORTS = await node_ports_v2.ports(
240239
user_id=user_id,
241-
project_id=ProjectIDStr(project_id),
240+
project_id=project_id,
242241
node_uuid=node_uuid,
243242
r_clone_settings=option_r_clone_settings,
244243
)
@@ -298,7 +297,7 @@ async def test_port_file_accessors(
298297

299298
PORTS = await node_ports_v2.ports(
300299
user_id=user_id,
301-
project_id=ProjectIDStr(project_id),
300+
project_id=project_id,
302301
node_uuid=node_uuid,
303302
r_clone_settings=option_r_clone_settings,
304303
)
@@ -375,7 +374,7 @@ async def test_adding_new_ports(
375374
config_dict, project_id, node_uuid = create_special_configuration()
376375
PORTS = await node_ports_v2.ports(
377376
user_id=user_id,
378-
project_id=ProjectIDStr(project_id),
377+
project_id=project_id,
379378
node_uuid=node_uuid,
380379
r_clone_settings=option_r_clone_settings,
381380
)
@@ -429,7 +428,7 @@ async def test_removing_ports(
429428
) # pylint: disable=W0612
430429
PORTS = await node_ports_v2.ports(
431430
user_id=user_id,
432-
project_id=ProjectIDStr(project_id),
431+
project_id=project_id,
433432
node_uuid=node_uuid,
434433
r_clone_settings=option_r_clone_settings,
435434
)
@@ -489,7 +488,7 @@ async def test_get_value_from_previous_node(
489488

490489
PORTS = await node_ports_v2.ports(
491490
user_id=user_id,
492-
project_id=ProjectIDStr(project_id),
491+
project_id=project_id,
493492
node_uuid=node_uuid,
494493
r_clone_settings=option_r_clone_settings,
495494
)
@@ -541,7 +540,7 @@ async def test_get_file_from_previous_node(
541540
)
542541
PORTS = await node_ports_v2.ports(
543542
user_id=user_id,
544-
project_id=ProjectIDStr(project_id),
543+
project_id=project_id,
545544
node_uuid=node_uuid,
546545
r_clone_settings=option_r_clone_settings,
547546
)
@@ -598,7 +597,7 @@ async def test_get_file_from_previous_node_with_mapping_of_same_key_name(
598597
)
599598
PORTS = await node_ports_v2.ports(
600599
user_id=user_id,
601-
project_id=ProjectIDStr(project_id),
600+
project_id=project_id,
602601
node_uuid=node_uuid,
603602
r_clone_settings=option_r_clone_settings,
604603
)
@@ -659,7 +658,7 @@ async def test_file_mapping(
659658
)
660659
PORTS = await node_ports_v2.ports(
661660
user_id=user_id,
662-
project_id=ProjectIDStr(project_id),
661+
project_id=project_id,
663662
node_uuid=node_uuid,
664663
r_clone_settings=option_r_clone_settings,
665664
)
@@ -752,7 +751,7 @@ async def test_regression_concurrent_port_update_fails(
752751

753752
PORTS = await node_ports_v2.ports(
754753
user_id=user_id,
755-
project_id=ProjectIDStr(project_id),
754+
project_id=project_id,
756755
node_uuid=node_uuid,
757756
r_clone_settings=option_r_clone_settings,
758757
)
@@ -841,7 +840,7 @@ async def test_batch_update_inputs_outputs(
841840

842841
PORTS = await node_ports_v2.ports(
843842
user_id=user_id,
844-
project_id=ProjectIDStr(project_id),
843+
project_id=project_id,
845844
node_uuid=node_uuid,
846845
r_clone_settings=option_r_clone_settings,
847846
)

0 commit comments

Comments
 (0)