Skip to content

Commit 2b91250

Browse files
laramielcopybara-github
authored andcommitted
s3: Improve AWS checksum validation
Fixes: #244 PiperOrigin-RevId: 786337455 Change-Id: Ie2e88cb54ddd2d4917cd496aa540dad9f024f508
1 parent 592b597 commit 2b91250

File tree

2 files changed

+65
-20
lines changed

2 files changed

+65
-20
lines changed

tensorstore/kvstore/s3/s3_key_value_store.cc

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,12 @@ struct ReadTask : public RateLimiterNode,
437437

438438
if (options.byte_range.size() != 0) {
439439
request_builder.MaybeAddRangeHeader(options.byte_range);
440+
if (options.byte_range.IsFull()) {
441+
// Checksum validation is only supported when reading the entire object,
442+
// and when the object was not written with a chunked upload.
443+
request_builder.AddHeader("x-amz-checksum-mode", "ENABLED");
444+
}
440445
}
441-
request_builder.AddHeader("x-amz-checksum-mode", "ENABLED");
442446

443447
const auto& ehr = endpoint_region_.value();
444448
start_time_ = absl::Now();
@@ -515,13 +519,22 @@ struct ReadTask : public RateLimiterNode,
515519
options.generation_conditions.if_not_equal, start_time_});
516520
}
517521

518-
// Validate checksum if present.
519-
if (auto it = httpresponse.headers.find("x-amz-checksum-sha256");
520-
it != httpresponse.headers.end()) {
521-
auto payload_sha256 = PayloadSha256Base64(httpresponse.payload);
522-
if (payload_sha256 != it->second) {
523-
return absl::DataLossError(absl::StrFormat(
524-
"Content hash mismatch: %s vs %s", payload_sha256, it->second));
522+
// Validate checksum if possible.
523+
if (auto checksum_sha256 =
524+
httpresponse.headers.find("x-amz-checksum-sha256");
525+
checksum_sha256 != httpresponse.headers.end()) {
526+
if (auto checksum_type = httpresponse.headers.find("x-amz-checksum-type");
527+
checksum_type == httpresponse.headers.end() ||
528+
checksum_type->second == "FULL_OBJECT") {
529+
// Only validate the payload checksum if the header indicates a full
530+
// object checksum was sent; multi-part uploads need to be validated
531+
// using the etag and multi-part configuration.
532+
auto payload_sha256 = PayloadSha256Base64(httpresponse.payload);
533+
if (payload_sha256 != checksum_sha256->second) {
534+
return absl::DataLossError(
535+
absl::StrFormat("Content hash mismatch: %s vs %s", payload_sha256,
536+
checksum_sha256->second));
537+
}
525538
}
526539
}
527540

tensorstore/kvstore/s3/s3_key_value_store_test.cc

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -155,26 +155,46 @@ TEST(S3KeyValueStoreTest, SimpleMock_VirtualHost) {
155155
HttpResponse{200, absl::Cord(),
156156
HeaderMap{{"x-amz-bucket-region", "us-east-1"}}}},
157157

158-
{"GET https://my-bucket.s3.us-east-1.amazonaws.com/tmp:1/key_read",
158+
{"GET https://my-bucket.s3.us-east-1.amazonaws.com/tmp:1/key_read1",
159159
HttpResponse{
160160
200, absl::Cord("abcd"),
161161
HeaderMap{{"etag", "\"900150983cd24fb0d6963f7d28e17f72\""},
162162
{"x-amz-checksum-sha256",
163163
"iNQmb9TmM40TuEX88olXnSCciXgjuSF9o+Fhk28DFYk="}}}},
164164

165-
{"GET https://my-bucket.s3.us-east-1.amazonaws.com/tmp:1/empty_read",
165+
{"GET https://my-bucket.s3.us-east-1.amazonaws.com/tmp:1/key_read2",
166+
HttpResponse{200, absl::Cord("abcd"),
167+
HeaderMap{
168+
{"etag", "\"900150983cd24fb0d6963f7d28e17f72\""},
169+
{"x-amz-checksum-sha256",
170+
"iNQmb9TmM40TuEX88olXnSCciXgjuSF9o+Fhk28DFYk="},
171+
{"x-amz-checksum-type", "FULL_OBJECT"},
172+
}}},
173+
174+
{"GET https://my-bucket.s3.us-east-1.amazonaws.com/tmp:1/key_read3",
166175
HttpResponse{
167-
200, absl::Cord(),
168-
HeaderMap{{"etag", "\"900150983cd24fb0d6963f7d28e17f73\""},
169-
{"x-amz-checksum-sha256",
170-
"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="}}}},
176+
200, absl::Cord("abcd"),
177+
HeaderMap{
178+
{"etag", "\"900150983cd24fb0d6963f7d28e17f72\""},
179+
{"x-amz-checksum-sha256",
180+
"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="}, // mismatch
181+
{"x-amz-checksum-type", "COMPOSITE"}, // not FULL_OBJECT
182+
}}},
183+
184+
{"GET https://my-bucket.s3.us-east-1.amazonaws.com/tmp:1/empty_read",
185+
HttpResponse{200, absl::Cord(),
186+
HeaderMap{
187+
{"etag", "\"900150983cd24fb0d6963f7d28e17f73\""},
188+
{"x-amz-checksum-sha256",
189+
"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="},
190+
}}},
171191

172192
{"GET https://my-bucket.s3.us-east-1.amazonaws.com/tmp:1/sha_mismatch",
173-
HttpResponse{
174-
200, absl::Cord("xyz"),
175-
HeaderMap{{"etag", "\"900150983cd24fb0d6963f7d28e17f73\""},
176-
{"x-amz-checksum-sha256",
177-
"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="}}}},
193+
HttpResponse{200, absl::Cord("xyz"),
194+
HeaderMap{{"etag", "\"900150983cd24fb0d6963f7d28e17f73\""},
195+
{"x-amz-checksum-sha256",
196+
"47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="},
197+
{"x-amz-checksum-type", "FULL_OBJECT"}}}},
178198

179199
{"PUT https://my-bucket.s3.us-east-1.amazonaws.com/tmp:1/key_write",
180200
HttpResponse{
@@ -197,7 +217,19 @@ TEST(S3KeyValueStoreTest, SimpleMock_VirtualHost) {
197217
.result());
198218

199219
EXPECT_THAT(
200-
kvstore::Read(store, "key_read").result(),
220+
kvstore::Read(store, "key_read1").result(),
221+
MatchesKvsReadResult(absl::Cord("abcd"),
222+
StorageGeneration::FromString(
223+
"\"900150983cd24fb0d6963f7d28e17f72\"")));
224+
225+
EXPECT_THAT(
226+
kvstore::Read(store, "key_read2").result(),
227+
MatchesKvsReadResult(absl::Cord("abcd"),
228+
StorageGeneration::FromString(
229+
"\"900150983cd24fb0d6963f7d28e17f72\"")));
230+
231+
EXPECT_THAT(
232+
kvstore::Read(store, "key_read3").result(),
201233
MatchesKvsReadResult(absl::Cord("abcd"),
202234
StorageGeneration::FromString(
203235
"\"900150983cd24fb0d6963f7d28e17f72\"")));

0 commit comments

Comments
 (0)