diff --git a/packages/aws-library/requirements/_base.txt b/packages/aws-library/requirements/_base.txt index 1f65d8a61aea..4834672e6186 100644 --- a/packages/aws-library/requirements/_base.txt +++ b/packages/aws-library/requirements/_base.txt @@ -384,7 +384,7 @@ typer==0.15.2 # via # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/settings-library/requirements/_base.in -types-aiobotocore==2.21.0 +types-aiobotocore==2.21.1 # via -r requirements/_base.in types-aiobotocore-ec2==2.21.0 # via types-aiobotocore diff --git a/packages/aws-library/requirements/_test.txt b/packages/aws-library/requirements/_test.txt index aeb40e2bbe72..6ba0bd4aa96d 100644 --- a/packages/aws-library/requirements/_test.txt +++ b/packages/aws-library/requirements/_test.txt @@ -280,9 +280,9 @@ sympy==1.13.3 # via cfn-lint termcolor==2.5.0 # via pytest-sugar -types-aioboto3==14.0.0 +types-aioboto3==14.1.0 # via -r requirements/_test.in -types-aiobotocore==2.21.0 +types-aiobotocore==2.21.1 # via # -c requirements/_base.txt # types-aioboto3 @@ -290,7 +290,7 @@ types-awscrt==0.23.10 # via # -c requirements/_base.txt # botocore-stubs -types-boto3==1.37.4 +types-boto3==1.38.2 # via -r requirements/_test.in types-s3transfer==0.11.3 # via diff --git a/packages/aws-library/src/aws_library/s3/__init__.py b/packages/aws-library/src/aws_library/s3/__init__.py index 8a9a85f1279e..ea8f6264d604 100644 --- a/packages/aws-library/src/aws_library/s3/__init__.py +++ b/packages/aws-library/src/aws_library/s3/__init__.py @@ -22,10 +22,10 @@ ) __all__: tuple[str, ...] = ( - "PRESIGNED_LINK_MAX_SIZE", - "S3_MAX_FILE_SIZE", "CopiedBytesTransferredCallback", "MultiPartUploadLinks", + "PRESIGNED_LINK_MAX_SIZE", + "S3_MAX_FILE_SIZE", "S3AccessError", "S3BucketInvalidError", "S3DestinationNotEmptyError", @@ -37,8 +37,8 @@ "S3RuntimeError", "S3UploadNotFoundError", "SimcoreS3API", - "UploadID", "UploadedBytesTransferredCallback", + "UploadID", ) # nopycln: file diff --git a/packages/aws-library/src/aws_library/s3/_client.py b/packages/aws-library/src/aws_library/s3/_client.py index 9d308a9d0fb2..69dba4fa3433 100644 --- a/packages/aws-library/src/aws_library/s3/_client.py +++ b/packages/aws-library/src/aws_library/s3/_client.py @@ -3,7 +3,6 @@ import functools import logging import urllib.parse -import warnings from collections.abc import AsyncGenerator, Sequence from dataclasses import dataclass, field from pathlib import Path @@ -12,7 +11,6 @@ import aioboto3 from aiobotocore.session import ClientCreatorContext from boto3.s3.transfer import TransferConfig -from botocore import __version__ as botocore_version from botocore import exceptions as botocore_exc from botocore.client import Config from models_library.api_schemas_storage.storage_schemas import ( @@ -22,7 +20,6 @@ ) from models_library.basic_types import SHA256Str from models_library.bytes_iters import BytesIter, DataSize -from packaging import version from pydantic import AnyUrl, ByteSize, TypeAdapter from servicelib.bytes_iters import DEFAULT_READ_CHUNK_SIZE, BytesStreamer from servicelib.logging_utils import log_catch, log_context @@ -54,22 +51,6 @@ ) from ._utils import compute_num_file_chunks, create_final_prefix -_BOTOCORE_VERSION: Final[version.Version] = version.parse(botocore_version) -_MAX_BOTOCORE_VERSION_COMPATIBLE_WITH_CEPH_S3: Final[version.Version] = version.parse( - "1.36.0" -) - - -def _check_botocore_version() -> None: - if _BOTOCORE_VERSION >= _MAX_BOTOCORE_VERSION_COMPATIBLE_WITH_CEPH_S3: - warnings.warn( - f"Botocore version {botocore_version} is not supported for file uploads with CEPH S3 until CEPH is updated. " - "Please use a version < 1.36.0. The upload operation will likely fail.", - RuntimeWarning, - stacklevel=2, - ) - - _logger = logging.getLogger(__name__) _S3_MAX_CONCURRENCY_DEFAULT: Final[int] = 10 @@ -107,13 +88,21 @@ async def create( session_client = None exit_stack = contextlib.AsyncExitStack() try: + config = Config( + # This setting tells the S3 client to only calculate checksums when explicitly required + # by the operation. This avoids unnecessary checksum calculations for operations that + # don't need them, improving performance. + # See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3.html#calculating-checksums + signature_version="s3v4", + request_checksum_calculation="when_required", # type: ignore[call-arg] + ) session_client = session.client( # type: ignore[call-overload] "s3", endpoint_url=f"{settings.S3_ENDPOINT}", aws_access_key_id=settings.S3_ACCESS_KEY, aws_secret_access_key=settings.S3_SECRET_KEY, region_name=settings.S3_REGION, - config=Config(signature_version="s3v4"), + config=config, ) assert isinstance(session_client, ClientCreatorContext) # nosec @@ -523,9 +512,6 @@ async def upload_file( bytes_transfered_cb: UploadedBytesTransferredCallback | None, ) -> None: """upload a file using aioboto3 transfer manager (e.g. works >5Gb and creates multiple threads)""" - - _check_botocore_version() - upload_options: dict[str, Any] = { "Bucket": bucket, "Key": object_key, @@ -550,9 +536,6 @@ async def copy_object( object_metadata: S3MetaData | None = None, ) -> None: """copy a file in S3 using aioboto3 transfer manager (e.g. works >5Gb and creates multiple threads)""" - - _check_botocore_version() - copy_options: dict[str, Any] = { "CopySource": {"Bucket": bucket, "Key": src_object_key}, "Bucket": bucket, @@ -659,7 +642,6 @@ async def upload_object_from_file_like( file_like_reader: FileLikeReader, ) -> None: """streams write an object in S3 from an AsyncIterable[bytes]""" - _check_botocore_version() await self._client.upload_fileobj(file_like_reader, bucket_name, object_key) # type: ignore[arg-type] @staticmethod diff --git a/services/dask-sidecar/requirements/_base.txt b/services/dask-sidecar/requirements/_base.txt index 96c7590e1df0..f59d2014f251 100644 --- a/services/dask-sidecar/requirements/_base.txt +++ b/services/dask-sidecar/requirements/_base.txt @@ -1,6 +1,6 @@ aio-pika==9.5.3 # via -r requirements/../../../packages/service-library/requirements/_base.in -aiobotocore==2.17.0 +aiobotocore==2.21.1 # via s3fs aiocache==0.12.3 # via -r requirements/../../../packages/service-library/requirements/_base.in @@ -67,10 +67,8 @@ blosc==1.11.2 # via -r requirements/_base.in bokeh==3.6.2 # via dask -botocore==1.35.93 - # via - # -c requirements/constraints.txt - # aiobotocore +botocore==1.37.1 + # via aiobotocore certifi==2024.8.30 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -569,7 +567,6 @@ urllib3==2.2.3 # -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt - # aiobotocore # botocore # distributed # requests diff --git a/services/dask-sidecar/requirements/_test.txt b/services/dask-sidecar/requirements/_test.txt index d9e95c7c297f..d72662cb7530 100644 --- a/services/dask-sidecar/requirements/_test.txt +++ b/services/dask-sidecar/requirements/_test.txt @@ -15,11 +15,11 @@ aws-xray-sdk==2.14.0 # via moto blinker==1.9.0 # via flask -boto3==1.35.93 +boto3==1.37.1 # via # aws-sam-translator # moto -botocore==1.35.93 +botocore==1.37.1 # via # -c requirements/_base.txt # aws-xray-sdk @@ -226,7 +226,7 @@ rpds-py==0.22.3 # -c requirements/_base.txt # jsonschema # referencing -s3transfer==0.10.4 +s3transfer==0.11.3 # via boto3 setuptools==75.8.2 # via moto diff --git a/services/dask-sidecar/requirements/constraints.txt b/services/dask-sidecar/requirements/constraints.txt index 9bb961b3dbd2..d8a10d684a0d 100644 --- a/services/dask-sidecar/requirements/constraints.txt +++ b/services/dask-sidecar/requirements/constraints.txt @@ -13,6 +13,3 @@ dask[distributed]>=2024.4.2 # issue with publish_dataset: https://github.com/das # # Compatibility/coordination # -# botocore does not always add the checksums to the s3 object metadata leading to issues with CEPH S3 -# see https://github.com/ITISFoundation/osparc-simcore/issues/7585 -botocore<1.36.0 diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py index 1016cfd5c5cd..ed1db6163d23 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py @@ -39,7 +39,7 @@ def _file_progress_cb( asyncio.run_coroutine_threadsafe( log_publishing_cb( f"{text_prefix}" - f" {100.0 * float(value or 0)/float(size or 1):.1f}%" + f" {100.0 * float(value or 0) / float(size or 1):.1f}%" f" ({ByteSize(value).human_readable() if value else 0} / {ByteSize(size).human_readable() if size else 'NaN'})", logging.DEBUG, ), @@ -59,6 +59,7 @@ class S3FsSettingsDict(TypedDict): key: str secret: str client_kwargs: ClientKWArgsDict + config_kwargs: dict[str, str] # For botocore config options _DEFAULT_AWS_REGION: Final[str] = "us-east-1" @@ -69,6 +70,14 @@ def _s3fs_settings_from_s3_settings(s3_settings: S3Settings) -> S3FsSettingsDict "key": s3_settings.S3_ACCESS_KEY, "secret": s3_settings.S3_SECRET_KEY, "client_kwargs": {}, + "config_kwargs": { + # This setting tells the S3 client to only calculate checksums when explicitly required + # by the operation. This avoids unnecessary checksum calculations for operations that + # don't need them, improving performance. + # See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3.html#calculating-checksums + "request_checksum_calculation": "when_required", + "signature_version": "s3v4", + }, } if s3_settings.S3_REGION != _DEFAULT_AWS_REGION: # NOTE: see https://github.com/boto/boto3/issues/125 why this is so... (sic) @@ -96,9 +105,10 @@ async def _copy_file( ): src_storage_kwargs = src_storage_cfg or {} dst_storage_kwargs = dst_storage_cfg or {} - with fsspec.open( - f"{src_url}", mode="rb", **src_storage_kwargs - ) as src_fp, fsspec.open(f"{dst_url}", "wb", **dst_storage_kwargs) as dst_fp: + with ( + fsspec.open(f"{src_url}", mode="rb", **src_storage_kwargs) as src_fp, + fsspec.open(f"{dst_url}", mode="wb", **dst_storage_kwargs) as dst_fp, + ): assert isinstance(src_fp, IOBase) # nosec assert isinstance(dst_fp, IOBase) # nosec file_size = getattr(src_fp, "size", None) @@ -106,16 +116,19 @@ async def _copy_file( total_data_written = 0 t = time.process_time() while data_read: - (data_read, data_written,) = await asyncio.get_event_loop().run_in_executor( + ( + data_read, + data_written, + ) = await asyncio.get_event_loop().run_in_executor( None, _file_chunk_streamer, src_fp, dst_fp ) elapsed_time = time.process_time() - t total_data_written += data_written or 0 await log_publishing_cb( f"{text_prefix}" - f" {100.0 * float(total_data_written or 0)/float(file_size or 1):.1f}%" + f" {100.0 * float(total_data_written or 0) / float(file_size or 1):.1f}%" f" ({ByteSize(total_data_written).human_readable() if total_data_written else 0} / {ByteSize(file_size).human_readable() if file_size else 'NaN'})" - f" [{ByteSize(total_data_written).to('MB')/elapsed_time:.2f} MBytes/s (avg)]", + f" [{ByteSize(total_data_written).to('MB') / elapsed_time:.2f} MBytes/s (avg)]", logging.DEBUG, ) diff --git a/services/storage/requirements/_base.in b/services/storage/requirements/_base.in index 99f71f50afff..cf0ccfdba899 100644 --- a/services/storage/requirements/_base.in +++ b/services/storage/requirements/_base.in @@ -3,7 +3,6 @@ # --constraint ../../../requirements/constraints.txt ---constraint ./constraints.txt --requirement ../../../packages/aws-library/requirements/_base.in diff --git a/services/storage/requirements/_base.txt b/services/storage/requirements/_base.txt index 7ac5068587f9..f5eea0f34d05 100644 --- a/services/storage/requirements/_base.txt +++ b/services/storage/requirements/_base.txt @@ -2,11 +2,11 @@ aio-pika==9.5.4 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in -aioboto3==13.3.0 +aioboto3==14.1.0 # via # -r requirements/../../../packages/aws-library/requirements/_base.in # -r requirements/_base.in -aiobotocore==2.16.0 +aiobotocore==2.21.1 # via aioboto3 aiocache==0.12.3 # via @@ -102,11 +102,10 @@ attrs==25.1.0 # referencing billiard==4.2.1 # via celery -boto3==1.35.81 +boto3==1.37.1 # via aiobotocore -botocore==1.35.81 +botocore==1.37.1 # via - # -c requirements/./constraints.txt # aiobotocore # boto3 # s3transfer @@ -286,6 +285,7 @@ jinja2==3.1.5 # fastapi jmespath==1.0.1 # via + # aiobotocore # boto3 # botocore jsonschema==4.23.0 @@ -337,6 +337,7 @@ mdurl==0.1.2 # via markdown-it-py multidict==6.1.0 # via + # aiobotocore # aiohttp # yarl opentelemetry-api==1.30.0 @@ -636,6 +637,7 @@ pyinstrument==5.0.1 # -r requirements/../../../packages/service-library/requirements/_base.in python-dateutil==2.9.0.post0 # via + # aiobotocore # arrow # botocore # celery @@ -754,7 +756,7 @@ rpds-py==0.22.3 # via # jsonschema # referencing -s3transfer==0.10.4 +s3transfer==0.11.3 # via boto3 sh==2.2.1 # via -r requirements/../../../packages/aws-library/requirements/_base.in diff --git a/services/storage/requirements/_test.txt b/services/storage/requirements/_test.txt index 6dfb56937e35..221cc0b62eec 100644 --- a/services/storage/requirements/_test.txt +++ b/services/storage/requirements/_test.txt @@ -47,12 +47,12 @@ billiard==4.2.1 # celery blinker==1.9.0 # via flask -boto3==1.35.81 +boto3==1.37.1 # via # -c requirements/_base.txt # aws-sam-translator # moto -botocore==1.35.81 +botocore==1.37.1 # via # -c requirements/_base.txt # aws-xray-sdk @@ -362,7 +362,7 @@ rpds-py==0.22.3 # -c requirements/_base.txt # jsonschema # referencing -s3transfer==0.10.4 +s3transfer==0.11.3 # via # -c requirements/_base.txt # boto3 diff --git a/services/storage/requirements/constraints.txt b/services/storage/requirements/constraints.txt deleted file mode 100644 index d8bb7d732246..000000000000 --- a/services/storage/requirements/constraints.txt +++ /dev/null @@ -1,3 +0,0 @@ -# botocore does not always add the checksums to the s3 object metadata leading to issues with CEPH S3 -# see https://github.com/ITISFoundation/osparc-simcore/issues/7585 -botocore<1.36.0