Skip to content

Commit 0ee12fe

Browse files
laramielcopybara-github
authored andcommitted
kvstore/s3 Use conditional write operations.
AWS has added conditional write support for S3. Using conditional writes improves write atomicity in tensorstore, with some caveats: 1/ Not all S3 compatible object stores support if-match; tensorstore will not issue conditional writes except on aws unless the variable TENSORSTORE_S3_USE_CONDITIONAL_WRITE is set. 2/ DELETE on AWS is not atomic, even when conditional writes are supported, as DELETE only supports if-match for directory buckets, so at present the if-match header is not used. Relevant API docs: https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObject.html Fixes: #211 PiperOrigin-RevId: 721962621 Change-Id: Ia2831aabba645686de98e4f95103a00ae0b30498
1 parent 61a6cee commit 0ee12fe

20 files changed

+820
-422
lines changed

tensorstore/internal/http/BUILD

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,6 @@ tensorstore_cc_test(
159159
],
160160
deps = [
161161
":http",
162-
"//tensorstore/util:status_testutil",
163162
"@com_google_absl//absl/container:flat_hash_set",
164163
"@com_google_absl//absl/status",
165164
"@com_google_googletest//:gtest_main",
@@ -258,11 +257,10 @@ tensorstore_cc_library(
258257
deps = [
259258
":http",
260259
"//tensorstore/util:result",
261-
"@com_google_absl//absl/container:flat_hash_map",
260+
"@com_google_absl//absl/log:absl_log",
262261
"@com_google_absl//absl/status",
263262
"@com_google_absl//absl/strings",
264263
"@com_google_absl//absl/strings:cord",
265264
"@com_google_absl//absl/synchronization",
266-
"@com_google_absl//absl/time",
267265
],
268266
)

tensorstore/internal/http/http_response.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
#include <limits>
2121
#include <optional>
22-
#include <string>
2322
#include <utility>
2423

2524
#include "absl/status/status.h"
@@ -168,6 +167,12 @@ absl::StatusCode HttpResponseCodeToStatusCode(const HttpResponse& response) {
168167
// body.)
169168
return absl::StatusCode::kOutOfRange;
170169

170+
case 409: // Conflict, such as a concurrent request.
171+
return absl::StatusCode::kAborted;
172+
173+
case 501: // Not Implemented
174+
return absl::StatusCode::kUnimplemented;
175+
171176
// UNAVAILABLE indicates a problem that can go away if the request
172177
// is just retried without any modification. 308 return codes are intended
173178
// for write requests that can be retried. See the documentation and the
@@ -177,7 +182,6 @@ absl::StatusCode HttpResponseCodeToStatusCode(const HttpResponse& response) {
177182
// https://cloud.google.com/storage/docs/request-rate
178183
case 308: // Resume Incomplete
179184
case 408: // Request Timeout
180-
case 409: // Conflict
181185
case 429: // Too Many Requests
182186
case 500: // Internal Server Error
183187
case 502: // Bad Gateway

tensorstore/internal/http/http_response_test.cc

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,12 @@
1414

1515
#include "tensorstore/internal/http/http_response.h"
1616

17-
#include <set>
18-
#include <utility>
19-
20-
#include <gmock/gmock.h>
2117
#include <gtest/gtest.h>
2218
#include "absl/container/flat_hash_set.h"
2319
#include "absl/status/status.h"
24-
#include "tensorstore/util/status_testutil.h"
2520

2621
namespace {
2722

28-
using ::tensorstore::IsOkAndHolds;
29-
using ::tensorstore::internal_http::HttpResponse;
30-
31-
3223
TEST(HttpResponseCodeToStatusTest, AllCodes) {
3324
using ::tensorstore::internal_http::HttpResponseCodeToStatus;
3425

@@ -68,12 +59,24 @@ TEST(HttpResponseCodeToStatusTest, AllCodes) {
6859
HttpResponseCodeToStatus({code, {}, {}}).code())
6960
<< code;
7061
}
71-
for (auto code : {308, 408, 409, 429, 500, 502, 503, 504}) {
62+
for (auto code : {308, 408, 429, 500, 502, 503, 504}) {
7263
seen.insert(code);
7364
EXPECT_EQ(absl::StatusCode::kUnavailable,
7465
HttpResponseCodeToStatus({code, {}, {}}).code())
7566
<< code;
7667
}
68+
for (auto code : {409}) {
69+
seen.insert(code);
70+
EXPECT_EQ(absl::StatusCode::kAborted,
71+
HttpResponseCodeToStatus({code, {}, {}}).code())
72+
<< code;
73+
}
74+
for (auto code : {501}) {
75+
seen.insert(code);
76+
EXPECT_EQ(absl::StatusCode::kUnimplemented,
77+
HttpResponseCodeToStatus({code, {}, {}}).code())
78+
<< code;
79+
}
7780

7881
for (int i = 300; i < 600; i++) {
7982
if (seen.count(i) > 0) continue;
@@ -84,5 +87,4 @@ TEST(HttpResponseCodeToStatusTest, AllCodes) {
8487
}
8588
}
8689

87-
8890
} // namespace

tensorstore/internal/http/mock_http_transport.cc

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616

1717
#include <string>
1818
#include <utility>
19+
#include <vector>
1920

20-
#include "absl/container/flat_hash_map.h"
21+
#include "absl/log/absl_log.h"
2122
#include "absl/status/status.h"
2223
#include "absl/strings/cord.h"
2324
#include "absl/strings/str_cat.h"
@@ -70,10 +71,8 @@ void ApplyResponseToHandler(const Result<HttpResponse>& response,
7071
}
7172
}
7273

73-
void DefaultMockHttpTransport::Reset(
74-
absl::flat_hash_map<std::string, internal_http::HttpResponse>
75-
url_to_response,
76-
bool add_headers) {
74+
void DefaultMockHttpTransport::Reset(Responses url_to_response,
75+
bool add_headers) {
7776
if (add_headers) {
7877
// Add additional headers to the response.
7978
for (auto& kv : url_to_response) {
@@ -90,15 +89,21 @@ void DefaultMockHttpTransport::IssueRequestWithHandler(
9089
const HttpRequest& request, IssueRequestOptions options,
9190
HttpResponseHandler* response_handler) {
9291
std::string key = absl::StrCat(request.method, " ", request.url);
92+
ABSL_LOG(INFO) << key;
9393
absl::MutexLock l(&mutex_);
9494
requests_.push_back(request);
95-
if (auto it =
96-
url_to_response_.find(absl::StrCat(request.method, " ", request.url));
97-
it != url_to_response_.end()) {
98-
return ApplyResponseToHandler(it->second, response_handler);
95+
96+
for (auto& kv : url_to_response_) {
97+
if (!kv.first.empty() && kv.first == key) {
98+
ApplyResponseToHandler(kv.second, response_handler);
99+
kv.first.clear();
100+
return;
101+
}
99102
}
103+
104+
ABSL_LOG(INFO) << "Returning 404 for: " << request;
100105
return ApplyResponseToHandler(
101-
internal_http::HttpResponse{404, absl::Cord(), {}}, response_handler);
106+
internal_http::HttpResponse{404, absl::Cord(key), {}}, response_handler);
102107
}
103108

104109
} // namespace internal_http

tensorstore/internal/http/mock_http_transport.h

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@
1919
#include <utility>
2020
#include <vector>
2121

22-
#include "absl/container/flat_hash_map.h"
2322
#include "absl/status/status.h"
24-
#include "absl/strings/cord.h"
2523
#include "absl/synchronization/mutex.h"
26-
#include "absl/time/time.h"
2724
#include "tensorstore/internal/http/http_request.h"
2825
#include "tensorstore/internal/http/http_response.h"
2926
#include "tensorstore/internal/http/http_transport.h"
@@ -45,20 +42,27 @@ void ApplyResponseToHandler(const Result<HttpResponse>& response,
4542

4643
/// Mocks an HttpTransport by overriding the IssueRequest method to
4744
/// respond with a predefined set of request-response pairs supplied
48-
/// to the constructor
45+
/// to the constructor.
46+
/// The first matching pair will be returned for each call, then expired.
4947
class DefaultMockHttpTransport : public internal_http::HttpTransport {
5048
public:
51-
DefaultMockHttpTransport(
52-
absl::flat_hash_map<std::string, internal_http::HttpResponse>
53-
url_to_response,
54-
bool add_headers = true) {
49+
using Responses =
50+
std::vector<std::pair<std::string, internal_http::HttpResponse>>;
51+
52+
/// Construct a DefaultMockHttpTransport that returns 404 for all requests.
53+
DefaultMockHttpTransport() = default;
54+
55+
explicit DefaultMockHttpTransport(Responses url_to_response) {
56+
Reset(std::move(url_to_response), true);
57+
}
58+
DefaultMockHttpTransport(Responses url_to_response, bool add_headers) {
5559
Reset(std::move(url_to_response), add_headers);
5660
}
5761
virtual ~DefaultMockHttpTransport() = default;
5862

59-
void Reset(absl::flat_hash_map<std::string, internal_http::HttpResponse>
60-
url_to_response,
61-
bool add_headers = true);
63+
/// Initializes the list of request-response pairs.
64+
/// The first matching pair will be returned for each call, then expired.
65+
void Reset(Responses url_to_response, bool add_headers = true);
6266

6367
const std::vector<HttpRequest>& requests() const { return requests_; }
6468

@@ -69,8 +73,7 @@ class DefaultMockHttpTransport : public internal_http::HttpTransport {
6973
private:
7074
absl::Mutex mutex_;
7175
std::vector<HttpRequest> requests_;
72-
absl::flat_hash_map<std::string, internal_http::HttpResponse>
73-
url_to_response_;
76+
Responses url_to_response_;
7477
};
7578

7679
} // namespace internal_http

tensorstore/kvstore/s3/BUILD

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ tensorstore_cc_library(
2323
":s3_request_builder",
2424
":s3_resource",
2525
":s3_uri_utils",
26+
":use_conditional_write",
2627
":validate",
2728
"//tensorstore:context",
2829
"//tensorstore/internal:data_copy_concurrency_resource",
@@ -57,6 +58,7 @@ tensorstore_cc_library(
5758
"//tensorstore/util/execution:any_receiver",
5859
"//tensorstore/util/garbage_collection",
5960
"@com_google_absl//absl/base:core_headers",
61+
"@com_google_absl//absl/log:absl_check",
6062
"@com_google_absl//absl/log:absl_log",
6163
"@com_google_absl//absl/status",
6264
"@com_google_absl//absl/strings",
@@ -140,6 +142,7 @@ tensorstore_cc_test(
140142
":s3",
141143
":s3_metadata",
142144
"//tensorstore/internal/http",
145+
"//tensorstore/kvstore:generation",
143146
"//tensorstore/util:status_testutil",
144147
"@com_google_absl//absl/status",
145148
"@com_google_absl//absl/strings:cord",
@@ -274,9 +277,7 @@ tensorstore_cc_test(
274277
":s3_endpoint",
275278
"//tensorstore/internal/http",
276279
"//tensorstore/internal/http:mock_http_transport",
277-
"//tensorstore/util:future",
278280
"//tensorstore/util:status_testutil",
279-
"@com_google_absl//absl/container:flat_hash_map",
280281
"@com_google_absl//absl/status",
281282
"@com_google_absl//absl/strings:cord",
282283
"@com_google_googletest//:gtest_main",
@@ -312,7 +313,6 @@ tensorstore_cc_test(
312313
"//tensorstore/kvstore:test_util",
313314
"//tensorstore/util:future",
314315
"//tensorstore/util:status_testutil",
315-
"@com_google_absl//absl/container:flat_hash_map",
316316
"@com_google_absl//absl/status",
317317
"@com_google_absl//absl/strings",
318318
"@com_google_absl//absl/strings:cord",
@@ -338,6 +338,7 @@ tensorstore_cc_test(
338338
deps = [
339339
":s3",
340340
":s3_request_builder",
341+
":use_conditional_write",
341342
"//tensorstore:context",
342343
"//tensorstore:json_serialization_options_base",
343344
"//tensorstore/internal:env",
@@ -376,3 +377,24 @@ tensorstore_cc_test(
376377
"@com_google_googletest//:gtest_main",
377378
],
378379
)
380+
381+
tensorstore_cc_library(
382+
name = "use_conditional_write",
383+
srcs = ["use_conditional_write.cc"],
384+
hdrs = ["use_conditional_write.h"],
385+
deps = [
386+
"//tensorstore/internal:env",
387+
"@com_google_absl//absl/flags:flag",
388+
"@com_google_absl//absl/strings",
389+
"@com_google_re2//:re2",
390+
],
391+
)
392+
393+
cc_test(
394+
name = "use_conditional_write_test",
395+
srcs = ["use_conditional_write_test.cc"],
396+
deps = [
397+
":use_conditional_write",
398+
"@com_google_googletest//:gtest_main",
399+
],
400+
)

tensorstore/kvstore/s3/credentials/BUILD

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ tensorstore_cc_library(
103103
hdrs = ["test_utils.h"],
104104
deps = [
105105
"//tensorstore/internal/http",
106-
"@com_google_absl//absl/container:flat_hash_map",
107106
"@com_google_absl//absl/strings:cord",
108107
"@com_google_absl//absl/strings:str_format",
109108
"@com_google_absl//absl/time",
@@ -169,7 +168,6 @@ tensorstore_cc_test(
169168
"//tensorstore/internal/http:mock_http_transport",
170169
"//tensorstore/util:result",
171170
"//tensorstore/util:status_testutil",
172-
"@com_google_absl//absl/container:flat_hash_map",
173171
"@com_google_absl//absl/status",
174172
"@com_google_absl//absl/strings:cord",
175173
"@com_google_absl//absl/time",

tensorstore/kvstore/s3/credentials/default_credential_provider_test.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ class DefaultCredentialProviderTest : public ::testing::Test {
7272
};
7373

7474
TEST_F(DefaultCredentialProviderTest, AnonymousCredentials) {
75-
auto mock_transport = std::make_shared<DefaultMockHttpTransport>(
76-
absl::flat_hash_map<std::string, HttpResponse>());
75+
auto mock_transport = std::make_shared<DefaultMockHttpTransport>();
7776
auto provider = std::make_unique<DefaultAwsCredentialsProvider>(
7877
Options{{}, {}, {}, mock_transport});
7978

@@ -156,7 +155,7 @@ TEST_F(DefaultCredentialProviderTest, ConfigureEC2ProviderFromOptions) {
156155
EXPECT_EQ(credentials.expires_at, expiry - absl::Seconds(60));
157156

158157
/// Force failure on credential retrieval
159-
mock_transport->Reset(absl::flat_hash_map<std::string, HttpResponse>{
158+
mock_transport->Reset({
160159
{"POST http://endpoint/latest/api/token",
161160
HttpResponse{404, absl::Cord{""}}},
162161
});
@@ -182,7 +181,7 @@ TEST_F(DefaultCredentialProviderTest, ConfigureEC2ProviderFromOptions) {
182181
EXPECT_EQ(credentials.expires_at, expiry - absl::Seconds(60));
183182

184183
/// Force failure on credential retrieval
185-
mock_transport->Reset(absl::flat_hash_map<std::string, HttpResponse>{
184+
mock_transport->Reset({
186185
{"POST http://endpoint/latest/api/token",
187186
HttpResponse{404, absl::Cord{""}}},
188187
});

0 commit comments

Comments
 (0)