diff --git a/docs/changelog/113989.yaml b/docs/changelog/113989.yaml new file mode 100644 index 0000000000000..7bf50b52d9e07 --- /dev/null +++ b/docs/changelog/113989.yaml @@ -0,0 +1,5 @@ +pr: 113989 +summary: Add `max_multipart_parts` setting to S3 repository +area: Snapshot/Restore +type: enhancement +issues: [] diff --git a/docs/reference/snapshot-restore/repository-s3.asciidoc b/docs/reference/snapshot-restore/repository-s3.asciidoc index 46ce827ea29ad..098e8b4b276f1 100644 --- a/docs/reference/snapshot-restore/repository-s3.asciidoc +++ b/docs/reference/snapshot-restore/repository-s3.asciidoc @@ -261,9 +261,11 @@ multiple deployments may share the same bucket. `chunk_size`:: - (<>) Big files can be broken down into chunks during snapshotting if needed. - Specify the chunk size as a value and unit, for example: - `1TB`, `1GB`, `10MB`. Defaults to the maximum size of a blob in the S3 which is `5TB`. + (<>) The maximum size of object that {es} will write to the repository + when creating a snapshot. Files which are larger than `chunk_size` will be chunked into several + smaller objects. {es} may also split a file across multiple objects to satisfy other constraints + such as the `max_multipart_parts` limit. Defaults to `5TB` which is the + https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum size of an object in AWS S3]. `compress`:: @@ -292,6 +294,14 @@ include::repository-shared-settings.asciidoc[] size allowed by S3. Defaults to `100mb` or `5%` of JVM heap, whichever is smaller. +`max_multipart_parts` :: + + (integer) The maximum number of parts that {es} will write during a multipart upload of a single + object. Files which are larger than `buffer_size × max_multipart_parts` will be chunked into + several smaller objects. {es} may also split a file across multiple objects to satisfy other + constraints such as the `chunk_size` limit. Defaults to `10000` which is the + https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum number of parts in a multipart upload in AWS S3]. + `canned_acl`:: The S3 repository supports all diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java index 1af3b1bc5b342..4597f93d38b92 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java @@ -141,6 +141,11 @@ class S3Repository extends MeteredBlobStoreRepository { MAX_FILE_SIZE_USING_MULTIPART ); + /** + * Maximum parts number for multipart upload. (see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html) + */ + static final Setting MAX_MULTIPART_PARTS = Setting.intSetting("max_multipart_parts", 10_000, 1, 10_000); + /** * Sets the S3 storage class type for the backup files. Values may be standard, reduced_redundancy, * standard_ia, onezone_ia and intelligent_tiering. Defaults to standard. @@ -254,7 +259,9 @@ class S3Repository extends MeteredBlobStoreRepository { } this.bufferSize = BUFFER_SIZE_SETTING.get(metadata.settings()); - this.chunkSize = CHUNK_SIZE_SETTING.get(metadata.settings()); + var maxChunkSize = CHUNK_SIZE_SETTING.get(metadata.settings()); + var maxPartsNum = MAX_MULTIPART_PARTS.get(metadata.settings()); + this.chunkSize = objectSizeLimit(maxChunkSize, bufferSize, maxPartsNum); // We make sure that chunkSize is bigger or equal than/to bufferSize if (this.chunkSize.getBytes() < bufferSize.getBytes()) { @@ -303,6 +310,20 @@ private static Map buildLocation(RepositoryMetadata metadata) { return Map.of("base_path", BASE_PATH_SETTING.get(metadata.settings()), "bucket", BUCKET_SETTING.get(metadata.settings())); } + /** + * Calculates S3 object size limit based on 2 constraints: maximum object(chunk) size + * and maximum number of parts for multipart upload. + * https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html + * + * @param chunkSize s3 object size + * @param bufferSize s3 multipart upload part size + * @param maxPartsNum s3 multipart upload max parts number + */ + private static ByteSizeValue objectSizeLimit(ByteSizeValue chunkSize, ByteSizeValue bufferSize, int maxPartsNum) { + var bytes = Math.min(chunkSize.getBytes(), bufferSize.getBytes() * maxPartsNum); + return ByteSizeValue.ofBytes(bytes); + } + /** * Holds a reference to delayed repository operation {@link Scheduler.Cancellable} so it can be cancelled should the repository be * closed concurrently. diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java index 1eab59ebb0eb7..3817af4def888 100644 --- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java +++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java @@ -175,4 +175,37 @@ public void testAnalysisFailureDetail() { } } + // ensures that chunkSize is limited to chunk_size setting, when buffer_size * parts_num is bigger + public void testChunkSizeLimit() { + var meta = new RepositoryMetadata( + "dummy-repo", + "mock", + Settings.builder() + .put(S3Repository.BUCKET_SETTING.getKey(), "bucket") + .put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "1GB") + .put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB") + .put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000) // ~1TB + .build() + ); + try (var repo = createS3Repo(meta)) { + assertEquals(ByteSizeValue.ofGb(1), repo.chunkSize()); + } + } + + // ensures that chunkSize is limited to buffer_size * parts_num, when chunk_size setting is bigger + public void testPartsNumLimit() { + var meta = new RepositoryMetadata( + "dummy-repo", + "mock", + Settings.builder() + .put(S3Repository.BUCKET_SETTING.getKey(), "bucket") + .put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "5TB") + .put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB") + .put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000) + .build() + ); + try (var repo = createS3Repo(meta)) { + assertEquals(ByteSizeValue.ofMb(1_000_000), repo.chunkSize()); + } + } }