From 28a8b941d2623e32ef698d36d50e365d61227638 Mon Sep 17 00:00:00 2001 From: Mikhail Berezovskiy Date: Tue, 10 Sep 2024 07:38:51 -0700 Subject: [PATCH 1/2] change default s3 chunk_size --- .../repositories/s3/S3Repository.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java index d75a3e8ad433e..764d967d8e03f 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java @@ -117,6 +117,21 @@ class S3Repository extends MeteredBlobStoreRepository { */ static final ByteSizeValue MAX_FILE_SIZE_USING_MULTIPART = new ByteSizeValue(5, ByteSizeUnit.TB); + /** + * Maximum number of parts that can be uploaded using the Multipart Upload API. + * (see http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html) + */ + static final long MAX_PARTS_NUMBER_USING_MULTIPART = 10_000; + + /** + * Big files should be broken down into chunks taking into account S3 maximum object size and number of parts. + * Chunk size would be in range {@code part_size * max_number_of_parts = [50Gb, 5Tb]}, where {@code part_size = [5Mb, 5Gb]} + * and {@code max_number_of_parts = 10_000} + */ + static final ByteSizeValue DEFAULT_CHUNK_SIZE = new ByteSizeValue( + DEFAULT_BUFFER_SIZE.getBytes() * MAX_PARTS_NUMBER_USING_MULTIPART, ByteSizeUnit.BYTES + ); + /** * Minimum threshold below which the chunk is uploaded using a single request. Beyond this threshold, * the S3 repository will use the AWS Multipart Upload API to split the chunk into several parts, each of buffer_size length, and @@ -135,7 +150,7 @@ class S3Repository extends MeteredBlobStoreRepository { */ static final Setting CHUNK_SIZE_SETTING = Setting.byteSizeSetting( "chunk_size", - MAX_FILE_SIZE_USING_MULTIPART, + DEFAULT_CHUNK_SIZE, new ByteSizeValue(5, ByteSizeUnit.MB), MAX_FILE_SIZE_USING_MULTIPART ); From e08aabb5a73b5401c5c9de3fce74a1bf1556f84c Mon Sep 17 00:00:00 2001 From: Mikhail Berezovskiy Date: Tue, 10 Sep 2024 08:39:27 -0700 Subject: [PATCH 2/2] test --- .../elasticsearch/repositories/s3/S3Repository.java | 3 ++- .../repositories/s3/S3BlobStoreContainerTests.java | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java index 764d967d8e03f..a02b5026c8c58 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java @@ -129,7 +129,8 @@ class S3Repository extends MeteredBlobStoreRepository { * and {@code max_number_of_parts = 10_000} */ static final ByteSizeValue DEFAULT_CHUNK_SIZE = new ByteSizeValue( - DEFAULT_BUFFER_SIZE.getBytes() * MAX_PARTS_NUMBER_USING_MULTIPART, ByteSizeUnit.BYTES + DEFAULT_BUFFER_SIZE.getBytes() * MAX_PARTS_NUMBER_USING_MULTIPART, + ByteSizeUnit.BYTES ); /** diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobStoreContainerTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobStoreContainerTests.java index fbbcfa475da44..b81430d635eef 100644 --- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobStoreContainerTests.java +++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobStoreContainerTests.java @@ -26,6 +26,7 @@ import org.elasticsearch.common.blobstore.BlobPath; import org.elasticsearch.common.blobstore.BlobStoreException; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.core.Tuple; import org.elasticsearch.test.ESTestCase; @@ -84,6 +85,16 @@ public void testExecuteSingleUploadBlobSizeLargerThanBufferSize() { assertEquals("Upload request size [2097152] can't be larger than buffer size", e.getMessage()); } + public void testDefaultChunkPartsLimit() { + var settings = Settings.EMPTY; + var blobSize = S3Repository.CHUNK_SIZE_SETTING.getDefault(settings).getBytes(); + var partSize = S3Repository.BUFFER_SIZE_SETTING.getDefault(settings).getBytes(); + var parts = S3BlobContainer.numberOfMultiparts(blobSize, partSize); + var gotParts = parts.v1(); + var maxParts = S3Repository.MAX_PARTS_NUMBER_USING_MULTIPART; + assertTrue("numbers of parts should be less-or-equal " + maxParts + ", got " + gotParts, gotParts <= maxParts); + } + public void testExecuteSingleUpload() throws IOException { final String bucketName = randomAlphaOfLengthBetween(1, 10); final String blobName = randomAlphaOfLengthBetween(1, 10);