-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Add a new setting for s3 API call timeout #138072
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a92d132
e36b2bd
02101a0
3153b52
3c1a680
a123c80
c38f377
e8745de
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| pr: 138072 | ||
| summary: Add a new setting for s3 API call timeout | ||
| area: Snapshot/Restore | ||
| type: enhancement | ||
| issues: [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,168 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
| package org.elasticsearch.repositories.s3; | ||
|
|
||
| import fixture.s3.S3HttpHandler; | ||
| import software.amazon.awssdk.core.exception.ApiCallTimeoutException; | ||
|
|
||
| import com.sun.net.httpserver.HttpExchange; | ||
| import com.sun.net.httpserver.HttpHandler; | ||
|
|
||
| import org.elasticsearch.ExceptionsHelper; | ||
| import org.elasticsearch.cluster.metadata.ProjectId; | ||
| import org.elasticsearch.common.blobstore.BlobPath; | ||
| import org.elasticsearch.common.blobstore.OperationPurpose; | ||
| import org.elasticsearch.common.bytes.BytesArray; | ||
| import org.elasticsearch.common.settings.MockSecureSettings; | ||
| import org.elasticsearch.common.settings.Settings; | ||
| import org.elasticsearch.common.unit.ByteSizeValue; | ||
| import org.elasticsearch.core.SuppressForbidden; | ||
| import org.elasticsearch.plugins.Plugin; | ||
| import org.elasticsearch.repositories.RepositoriesService; | ||
| import org.elasticsearch.repositories.blobstore.BlobStoreRepository; | ||
| import org.elasticsearch.repositories.blobstore.ESMockAPIBasedRepositoryIntegTestCase; | ||
| import org.elasticsearch.test.ESIntegTestCase; | ||
| import org.elasticsearch.threadpool.ThreadPool; | ||
|
|
||
| import java.io.IOException; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.concurrent.CountDownLatch; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.atomic.AtomicReference; | ||
|
|
||
| import static org.hamcrest.Matchers.containsString; | ||
|
|
||
| @SuppressForbidden(reason = "this test uses a HttpServer to emulate an S3 endpoint") | ||
| // Need to set up a new cluster for each test because cluster settings use randomized authentication settings | ||
| @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST) | ||
| public class S3BlobStoreRepositoryTimeoutTests extends ESMockAPIBasedRepositoryIntegTestCase { | ||
|
|
||
| private S3StallingHttpHandler s3StallingHttpHandler; | ||
|
|
||
| @Override | ||
| public void setUp() throws Exception { | ||
| super.setUp(); | ||
| } | ||
|
|
||
| @Override | ||
| protected Collection<Class<? extends Plugin>> nodePlugins() { | ||
| return List.of(S3RepositoryPlugin.class); | ||
| } | ||
|
|
||
| @Override | ||
| protected String repositoryType() { | ||
| return S3Repository.TYPE; | ||
| } | ||
|
|
||
| @Override | ||
| protected Settings repositorySettings(String repoName) { | ||
| Settings.Builder settingsBuilder = Settings.builder() | ||
| .put(super.repositorySettings(repoName)) | ||
| .put(S3Repository.BUCKET_SETTING.getKey(), "bucket") | ||
| .put(S3Repository.CLIENT_NAME.getKey(), "test"); | ||
| if (randomBoolean()) { | ||
| settingsBuilder.put(S3Repository.BASE_PATH_SETTING.getKey(), randomFrom("test", "test/1")); | ||
| } | ||
| return settingsBuilder.build(); | ||
| } | ||
|
|
||
| @Override | ||
| protected Map<String, HttpHandler> createHttpHandlers() { | ||
| this.s3StallingHttpHandler = new S3StallingHttpHandler("bucket"); | ||
| return Collections.singletonMap("/bucket", this.s3StallingHttpHandler); | ||
| } | ||
|
|
||
| @Override | ||
| protected HttpHandler createErroneousHttpHandler(final HttpHandler delegate) { | ||
| return delegate; | ||
| } | ||
|
|
||
| @Override | ||
| protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { | ||
| final MockSecureSettings secureSettings = new MockSecureSettings(); | ||
| secureSettings.setString(S3ClientSettings.ACCESS_KEY_SETTING.getConcreteSettingForNamespace("test").getKey(), "test_access_key"); | ||
| secureSettings.setString(S3ClientSettings.SECRET_KEY_SETTING.getConcreteSettingForNamespace("test").getKey(), "test_secret_key"); | ||
|
|
||
| final Settings.Builder builder = Settings.builder() | ||
| .put(ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING.getKey(), 0) // We have tests that verify an exact wait time | ||
| .put(S3ClientSettings.ENDPOINT_SETTING.getConcreteSettingForNamespace("test").getKey(), httpServerUrl()) | ||
| .put(S3ClientSettings.READ_TIMEOUT_SETTING.getConcreteSettingForNamespace("test").getKey(), "1s") | ||
| .put(S3ClientSettings.MAX_RETRIES_SETTING.getConcreteSettingForNamespace("test").getKey(), "0") | ||
| .put(S3ClientSettings.API_CALL_TIMEOUT_SETTING.getConcreteSettingForNamespace("test").getKey(), "5s") | ||
| .put(super.nodeSettings(nodeOrdinal, otherSettings)) | ||
| .setSecureSettings(secureSettings); | ||
|
|
||
| return builder.build(); | ||
| } | ||
|
|
||
| public void testWriteTimeout() { | ||
| final String repository = createRepository(randomIdentifier()); | ||
|
|
||
| final var blobStoreRepository = (BlobStoreRepository) internalCluster().getDataNodeInstance(RepositoriesService.class) | ||
| .repository(ProjectId.DEFAULT, repository); | ||
| final var blobContainer = blobStoreRepository.blobStore().blobContainer(BlobPath.EMPTY.add(randomIdentifier())); | ||
|
|
||
| final var latch = new CountDownLatch(1); | ||
| s3StallingHttpHandler.setStallLatchRef(latch); | ||
| try { | ||
| blobContainer.writeBlob( | ||
| randomFrom(OperationPurpose.values()), | ||
| "index-" + randomIdentifier(), | ||
| new BytesArray(randomBytes((int) ByteSizeValue.ofMb(10).getBytes())), | ||
| randomBoolean() | ||
| ); | ||
| fail("should have timed out"); | ||
| } catch (IOException e) { | ||
| final var cause = ExceptionsHelper.unwrap(e, ApiCallTimeoutException.class); | ||
| assertNotNull(cause); | ||
| assertThat(cause.getMessage(), containsString("Client execution did not complete before the specified timeout configuration")); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we assert something about how these outcomes are captured in the metrics? Should we add something specific to |
||
| } finally { | ||
| latch.countDown(); | ||
| } | ||
| } | ||
|
|
||
| @SuppressForbidden(reason = "this test uses a HttpHandler to emulate an S3 endpoint") | ||
| protected class S3StallingHttpHandler extends S3HttpHandler implements BlobStoreHttpHandler { | ||
|
|
||
| private final AtomicReference<CountDownLatch> stallLatchRef = new AtomicReference<>(null); | ||
|
|
||
| S3StallingHttpHandler(final String bucket) { | ||
| super(bucket); | ||
| } | ||
|
|
||
| @Override | ||
| public void handle(final HttpExchange exchange) throws IOException { | ||
| final var latch = stallLatchRef.get(); | ||
| if (latch != null) { | ||
| final String headerDecodedContentLength = exchange.getRequestHeaders().getFirst("x-amz-decoded-content-length"); | ||
| logger.info( | ||
| "--> Simulating server unresponsiveness for request [{} {}] with decoded content length [{}]", | ||
| exchange.getRequestMethod(), | ||
| exchange.getRequestURI(), | ||
| headerDecodedContentLength | ||
| ); | ||
| try { | ||
| final var released = latch.await(60, TimeUnit.SECONDS); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we use |
||
| logger.info("--> Latch released: {}", released); | ||
| } catch (InterruptedException e) { | ||
| Thread.currentThread().interrupt(); | ||
| } | ||
| logger.info("--> Done simulating server unresponsiveness"); | ||
| } | ||
| super.handle(exchange); | ||
| } | ||
|
|
||
| void setStallLatchRef(CountDownLatch latch) { | ||
| stallLatchRef.set(latch); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -142,6 +142,16 @@ final class S3ClientSettings { | |
| key -> Setting.intSetting(key, Defaults.RETRY_COUNT, 0, Property.NodeScope) | ||
| ); | ||
|
|
||
| /** | ||
| * The maximum time for a single attempt of an API operation. See also | ||
| * <a href="https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/timeouts.html">AWS SDK docs on timeout</a> | ||
| */ | ||
| static final Setting.AffixSetting<TimeValue> API_CALL_TIMEOUT_SETTING = Setting.affixKeySetting( | ||
| PREFIX, | ||
| "api_call_timeout", | ||
| key -> Setting.timeSetting(key, Defaults.API_CALL_TIMEOUT, Property.NodeScope) | ||
| ); | ||
|
|
||
| /** Formerly whether retries should be throttled (ie use backoff), now unused. V2 AWS SDK always uses throttling. */ | ||
| @UpdateForV10(owner = UpdateForV10.Owner.DISTRIBUTED_COORDINATION) // no longer used, should be removed in v10 | ||
| static final Setting.AffixSetting<Boolean> UNUSED_USE_THROTTLE_RETRIES_SETTING = Setting.affixKeySetting( | ||
|
|
@@ -232,6 +242,11 @@ final class S3ClientSettings { | |
| /** The number of retries to use for the s3 client. */ | ||
| final int maxRetries; | ||
|
|
||
| /** | ||
| * The maximum time for a single attempt of an API operation | ||
| */ | ||
| final int apiCallTimeoutMillis; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we keep this as a |
||
|
|
||
| /** Whether the s3 client should use path style access. */ | ||
| final boolean pathStyleAccess; | ||
|
|
||
|
|
@@ -257,6 +272,7 @@ private S3ClientSettings( | |
| long connectionMaxIdleTimeMillis, | ||
| int maxConnections, | ||
| int maxRetries, | ||
| int apiCallTimeoutMillis, | ||
| boolean pathStyleAccess, | ||
| boolean disableChunkedEncoding, | ||
| boolean addPurposeCustomQueryParameter, | ||
|
|
@@ -274,6 +290,7 @@ private S3ClientSettings( | |
| this.connectionMaxIdleTimeMillis = connectionMaxIdleTimeMillis; | ||
| this.maxConnections = maxConnections; | ||
| this.maxRetries = maxRetries; | ||
| this.apiCallTimeoutMillis = apiCallTimeoutMillis; | ||
| this.pathStyleAccess = pathStyleAccess; | ||
| this.disableChunkedEncoding = disableChunkedEncoding; | ||
| this.addPurposeCustomQueryParameter = addPurposeCustomQueryParameter; | ||
|
|
@@ -303,6 +320,9 @@ S3ClientSettings refine(Settings repositorySettings) { | |
| ); | ||
| final int newMaxConnections = getRepoSettingOrDefault(MAX_CONNECTIONS_SETTING, normalizedSettings, maxConnections); | ||
| final int newMaxRetries = getRepoSettingOrDefault(MAX_RETRIES_SETTING, normalizedSettings, maxRetries); | ||
| final int newApiCallTimeoutMillis = Math.toIntExact( | ||
| getRepoSettingOrDefault(API_CALL_TIMEOUT_SETTING, normalizedSettings, TimeValue.timeValueMillis(apiCallTimeoutMillis)).millis() | ||
| ); | ||
| final boolean newPathStyleAccess = getRepoSettingOrDefault(USE_PATH_STYLE_ACCESS, normalizedSettings, pathStyleAccess); | ||
| final boolean newDisableChunkedEncoding = getRepoSettingOrDefault( | ||
| DISABLE_CHUNKED_ENCODING, | ||
|
|
@@ -355,6 +375,7 @@ S3ClientSettings refine(Settings repositorySettings) { | |
| newConnectionMaxIdleTimeMillis, | ||
| newMaxConnections, | ||
| newMaxRetries, | ||
| newApiCallTimeoutMillis, | ||
| newPathStyleAccess, | ||
| newDisableChunkedEncoding, | ||
| newAddPurposeCustomQueryParameter, | ||
|
|
@@ -464,6 +485,7 @@ static S3ClientSettings getClientSettings(final Settings settings, final String | |
| getConfigValue(settings, clientName, CONNECTION_MAX_IDLE_TIME_SETTING).millis(), | ||
| getConfigValue(settings, clientName, MAX_CONNECTIONS_SETTING), | ||
| getConfigValue(settings, clientName, MAX_RETRIES_SETTING), | ||
| Math.toIntExact(getConfigValue(settings, clientName, API_CALL_TIMEOUT_SETTING).millis()), | ||
| getConfigValue(settings, clientName, USE_PATH_STYLE_ACCESS), | ||
| getConfigValue(settings, clientName, DISABLE_CHUNKED_ENCODING), | ||
| getConfigValue(settings, clientName, ADD_PURPOSE_CUSTOM_QUERY_PARAMETER), | ||
|
|
@@ -486,6 +508,7 @@ public boolean equals(final Object o) { | |
| && Objects.equals(connectionMaxIdleTimeMillis, that.connectionMaxIdleTimeMillis) | ||
| && maxConnections == that.maxConnections | ||
| && maxRetries == that.maxRetries | ||
| && apiCallTimeoutMillis == that.apiCallTimeoutMillis | ||
| && Objects.equals(credentials, that.credentials) | ||
| && Objects.equals(protocol, that.protocol) | ||
| && Objects.equals(endpoint, that.endpoint) | ||
|
|
@@ -512,6 +535,7 @@ public int hashCode() { | |
| readTimeoutMillis, | ||
| connectionMaxIdleTimeMillis, | ||
| maxRetries, | ||
| apiCallTimeoutMillis, | ||
| maxConnections, | ||
| disableChunkedEncoding, | ||
| addPurposeCustomQueryParameter, | ||
|
|
@@ -536,5 +560,6 @@ static final class Defaults { | |
| static final TimeValue CONNECTION_MAX_IDLE_TIME = TimeValue.timeValueSeconds(60); | ||
| static final int MAX_CONNECTIONS = 50; | ||
| static final int RETRY_COUNT = 3; | ||
| static final TimeValue API_CALL_TIMEOUT = TimeValue.ZERO; // default to no API call timeout | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we use
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Turns out zero is invalid here: Still I'd prefer to use |
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we use
expectThrows()here?