Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/113989.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113989
summary: Add `parts_number` setting to S3 repository
area: Snapshot/Restore
type: enhancement
issues: []
11 changes: 10 additions & 1 deletion docs/reference/snapshot-restore/repository-s3.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ multiple deployments may share the same bucket.
`chunk_size`::

(<<byte-units,byte value>>) Big files can be broken down into chunks during snapshotting if needed.
Specify the chunk size as a value and unit, for example:
When large file split into chunks, the chunk size will be defined by smallest of `chunk_size`
or `buffer_size * parts_number`. Specify the chunk size as a value and unit, for example:
`1TB`, `1GB`, `10MB`. Defaults to the maximum size of a blob in the S3 which is `5TB`.

`compress`::
Expand Down Expand Up @@ -292,6 +293,14 @@ include::repository-shared-settings.asciidoc[]
size allowed by S3. Defaults to `100mb` or `5%` of JVM heap, whichever is
smaller.

`parts_number` ::

(<<number,numeric>>) Maximum number of parts for multipart upload. When large file split into
chunks, the chunk size will be defined by smallest of `chunk_size` or `buffer_size * parts_number`.
Default value is 10,000, also see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[S3 multipart upload limits].
For example, with `buffer_size=100MB` and `parts_number=10,000` summation of all parts is about 1TB.
If chunk_size is set to 5TB then smallest between two would be 1TB.

`canned_acl`::

The S3 repository supports all
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ class S3Repository extends MeteredBlobStoreRepository {
MAX_FILE_SIZE_USING_MULTIPART
);

/**
* Maximum parts number for multipart upload. (see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html)
*/
static final Setting<Integer> PARTS_NUMBER = Setting.intSetting("parts_number", 10_000, 1, 10_000);

/**
* Sets the S3 storage class type for the backup files. Values may be standard, reduced_redundancy,
* standard_ia, onezone_ia and intelligent_tiering. Defaults to standard.
Expand Down Expand Up @@ -253,7 +258,9 @@ class S3Repository extends MeteredBlobStoreRepository {
}

this.bufferSize = BUFFER_SIZE_SETTING.get(metadata.settings());
this.chunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
var maxChunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
var maxPartsNum = PARTS_NUMBER.get(metadata.settings());
this.chunkSize = objectSizeLimit(maxChunkSize, bufferSize, maxPartsNum);

// We make sure that chunkSize is bigger or equal than/to bufferSize
if (this.chunkSize.getBytes() < bufferSize.getBytes()) {
Expand Down Expand Up @@ -302,6 +309,20 @@ private static Map<String, String> buildLocation(RepositoryMetadata metadata) {
return Map.of("base_path", BASE_PATH_SETTING.get(metadata.settings()), "bucket", BUCKET_SETTING.get(metadata.settings()));
}

/**
* Calculates S3 object size limit based on 2 constraints: maximum object(chunk) size
* and maximum number of parts for multipart upload.
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
*
* @param objectSize object size limit in s3 or chunk_size
* @param partSize part size in s3 or buffer_size
* @param partsNum number of parts(buffers)
*/
static ByteSizeValue objectSizeLimit(ByteSizeValue objectSize, ByteSizeValue partSize, int partsNum) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: could we align the argument names with the names of the variables passed in? I know they're kinda weird names but we can't change the settings' names for legacy reasons and we're following that naming elsewhere.

var bytes = Math.min(objectSize.getBytes(), partSize.getBytes() * partsNum);
return ByteSizeValue.ofBytes(bytes);
}

/**
* Holds a reference to delayed repository operation {@link Scheduler.Cancellable} so it can be cancelled should the repository be
* closed concurrently.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,4 +175,37 @@ public void testAnalysisFailureDetail() {
}
}

// ensures that chunkSize is limited to chunk_size setting, when buffer_size * parts_num is bigger
public void testChunkSizeLimit() {
var meta = new RepositoryMetadata(
"dummy-repo",
"mock",
Settings.builder()
.put(S3Repository.BUCKET_SETTING.getKey(), "bucket")
.put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "1GB")
.put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB")
.put(S3Repository.PARTS_NUMBER.getKey(), 10_000) // ~1TB
.build()
);
try (var repo = createS3Repo(meta)) {
assertEquals(ByteSizeValue.ofGb(1), repo.chunkSize());
}
}

// ensures that chunkSize is limited to buffer_size * parts_num, when chunk_size setting is bigger
public void testPartsNumLimit() {
var meta = new RepositoryMetadata(
"dummy-repo",
"mock",
Settings.builder()
.put(S3Repository.BUCKET_SETTING.getKey(), "bucket")
.put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "5TB")
.put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB")
.put(S3Repository.PARTS_NUMBER.getKey(), 10_000)
.build()
);
try (var repo = createS3Repo(meta)) {
assertEquals(ByteSizeValue.ofMb(1_000_000), repo.chunkSize());
}
}
}