3333)
3434from rohmu .object_storage .config import ( # noqa: F401
3535 calculate_s3_chunk_size as calculate_chunk_size ,
36- S3_MULTIPART_CHUNK_SIZE as MULTIPART_CHUNK_SIZE ,
36+ S3_DEFAULT_MULTIPART_CHUNK_SIZE as MULTIPART_CHUNK_SIZE ,
37+ S3_MAX_NUM_PARTS_PER_UPLOAD ,
38+ S3_MAX_PART_SIZE_BYTES ,
3739 S3_READ_BLOCK_SIZE as READ_BLOCK_SIZE ,
3840 S3AddressingStyle ,
3941 S3ObjectStorageConfig as Config ,
@@ -156,7 +158,7 @@ def __init__(
156158 self .read_timeout = read_timeout
157159 self .aws_session_token = aws_session_token
158160 self .use_dualstack_endpoint = use_dualstack_endpoint
159- self .multipart_chunk_size = segment_size
161+ self .default_multipart_chunk_size = segment_size
160162 self .encrypted = encrypted
161163 self .s3_client : Optional [S3Client ] = None
162164 self .location = ""
@@ -486,6 +488,29 @@ def get_file_size(self, key: str) -> int:
486488 else :
487489 raise StorageError (f"File size lookup failed for { path } " ) from ex
488490
491+ def calculate_chunks_and_chunk_size (self , size : Optional [int ]) -> tuple [int , int ]:
492+ """Calculate the number of chunks and chunk size for multipart upload.
493+
494+ If sizes provided self.default_multipart_chunk_size wil be used as first attempt,
495+ if number of chunks is greater than S3_MAX_NUM_PARTS_PER_UPLOAD, chunk size will be doubled,
496+ until the number of chunks is less than S3_MAX_NUM_PARTS_PER_UPLOAD.
497+ """
498+ if size is None :
499+ return 1 , self .default_multipart_chunk_size
500+ chunks = math .ceil (size / self .default_multipart_chunk_size )
501+ chunk_size = self .default_multipart_chunk_size
502+
503+ if chunks > S3_MAX_NUM_PARTS_PER_UPLOAD :
504+ chunk_size = math .ceil (size / S3_MAX_NUM_PARTS_PER_UPLOAD )
505+ if chunk_size > S3_MAX_PART_SIZE_BYTES :
506+ raise StorageError (
507+ f"Cannot upload a file of size { size } . "
508+ f"Chunk size { chunk_size } is too big for each part of multipart upload."
509+ )
510+ chunks = math .ceil (size / chunk_size )
511+
512+ return chunks , chunk_size
513+
489514 def multipart_upload_file_object (
490515 self ,
491516 * ,
@@ -500,11 +525,11 @@ def multipart_upload_file_object(
500525 start_of_multipart_upload = time .monotonic ()
501526 bytes_sent = 0
502527
503- chunks : int = 1
504- if size is not None :
505- chunks = math .ceil (size / self .multipart_chunk_size )
528+ chunks , chunk_size = self .calculate_chunks_and_chunk_size (size )
506529 args , sanitized_metadata , path = self ._init_args_for_multipart (key , metadata , mimetype , cache_control )
507- self .log .debug ("Starting to upload multipart file: %r, size: %s, chunks: %s" , path , size , chunks )
530+ self .log .debug (
531+ "Starting to upload multipart file: %r, size: %s, chunks: %d (chunk size: %d)" , path , size , chunks , chunk_size
532+ )
508533
509534 parts : list [CompletedPartTypeDef ] = []
510535 part_number = 1
@@ -518,7 +543,7 @@ def multipart_upload_file_object(
518543 mp_id = cmu_response ["UploadId" ]
519544
520545 while True :
521- data = self ._read_bytes (fp , self . multipart_chunk_size )
546+ data = self ._read_bytes (fp , chunk_size )
522547 if not data :
523548 break
524549
@@ -635,7 +660,7 @@ def store_file_object(
635660 upload_progress_fn : IncrementalProgressCallbackType = None ,
636661 ) -> None :
637662 if not self ._should_multipart (
638- fd = fd , chunk_size = self .multipart_chunk_size , default = True , metadata = metadata , multipart = multipart
663+ fd = fd , chunk_size = self .default_multipart_chunk_size , default = True , metadata = metadata , multipart = multipart
639664 ):
640665 data = fd .read ()
641666 self .store_file_from_memory (key , data , metadata , cache_control = cache_control , mimetype = mimetype )
0 commit comments