Skip to content

Commit f314650

Browse files
author
Duc Hieu Pham
committed
Fix a bug in checking CopyObject response
Summary: According to AWS docs: ``` A copy request might return an error when Amazon S3 receives the copy request or while Amazon S3 is copying the files. If the error occurs before the copy operation starts, you receive a standard Amazon S3 error. If the error occurs during the copy operation, the error response is embedded in the 200 OK response. This means that a 200 OK response can contain either a success or an error. Design your application to parse the contents of the response and handle it appropriately. ``` That means even if we receive 200, it still might fail. According to another doc: https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_s3_1_1_model_1_1_copy_object_result_details.html ``` The source and destination ETag is identical for a successfully copied object ``` That means we need to compare the 2 ETags to be sure. Test Plan: unit test. also deploy in my namespace. Reviewers: dhruba, igor Reviewed By: dhruba Differential Revision: https://rockset.phacility.com/D6325
1 parent 77e0d58 commit f314650

File tree

2 files changed

+101
-11
lines changed

2 files changed

+101
-11
lines changed

cloud/aws/aws_s3.cc

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "util/string_util.h"
5353

5454
namespace rocksdb {
55+
5556
#ifdef USE_AWS
5657
class CloudRequestCallbackGuard {
5758
public:
@@ -159,12 +160,25 @@ class AwsS3ClientWrapper {
159160
return outcome;
160161
}
161162

163+
// The copy RPC is only successful iff outcome is success and etags between
164+
// src and dst match.
165+
static bool isCopyCloudObjectSuccess(
166+
Aws::S3::Model::CopyObjectOutcome& outcome, const Aws::String& src_etag) {
167+
bool success = false;
168+
if (outcome.IsSuccess()) {
169+
const auto& detail = outcome.GetResult().GetCopyObjectResultDetails();
170+
success = (detail.ETagHasBeenSet() && detail.GetETag() == src_etag);
171+
}
172+
return success;
173+
}
174+
162175
Aws::S3::Model::CopyObjectOutcome CopyCloudObject(
163-
const Aws::S3::Model::CopyObjectRequest& request) {
176+
const Aws::S3::Model::CopyObjectRequest& request,
177+
const Aws::String& src_etag) {
164178
CloudRequestCallbackGuard t(cloud_request_callback_.get(),
165179
CloudRequestOpType::kCopyOp);
166180
auto outcome = client_->CopyObject(request);
167-
t.SetSuccess(outcome.IsSuccess());
181+
t.SetSuccess(isCopyCloudObjectSuccess(outcome, src_etag));
168182
return outcome;
169183
}
170184

@@ -746,6 +760,25 @@ Status S3StorageProvider::CopyCloudObject(const std::string& bucket_name_src,
746760

747761
Aws::String src_url = src_bucket + src_object;
748762

763+
// Get the metadata of the source object. We need the metadata so that we can
764+
// compare the ETag between the source object and dest object. Only when these
765+
// ETags match can we conclude this CopyObject operations succeed.
766+
//
767+
// More details:
768+
// https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_s3_1_1_model_1_1_copy_object_result_details.html
769+
// https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html
770+
Aws::S3::Model::HeadObjectRequest head_request;
771+
head_request.SetBucket(ToAwsString(bucket_name_src));
772+
head_request.SetKey(ToAwsString(object_path_src));
773+
774+
auto head_outcome = s3client_->HeadObject(head_request);
775+
if (!head_outcome.IsSuccess()) {
776+
return Status::NotFound(
777+
"[s3] CopyCloudObject: Fail to get metadata of src object: %s",
778+
head_outcome.GetError().GetMessage().c_str());
779+
}
780+
const auto& src_etag = head_outcome.GetResult().GetETag();
781+
749782
// create copy request
750783
Aws::S3::Model::CopyObjectRequest request;
751784
request.SetCopySource(src_url);
@@ -755,15 +788,29 @@ Status S3StorageProvider::CopyCloudObject(const std::string& bucket_name_src,
755788

756789
// execute request
757790
Aws::S3::Model::CopyObjectOutcome outcome =
758-
s3client_->CopyCloudObject(request);
759-
bool isSuccess = outcome.IsSuccess();
791+
s3client_->CopyCloudObject(request, src_etag);
792+
bool isSuccess =
793+
AwsS3ClientWrapper::isCopyCloudObjectSuccess(outcome, src_etag);
760794
if (!isSuccess) {
761-
const Aws::Client::AWSError<Aws::S3::S3Errors>& error = outcome.GetError();
762-
std::string errmsg(error.GetMessage().c_str());
795+
// S3 CopyObject RPC can report errors in 2 ways:
796+
// 1. outcome.IsSuccess() = false
797+
// 2. outcome.IsSuccess() = true but the outcome's ETag doesn't match the
798+
// source's ETag.
799+
if (!outcome.IsSuccess()) {
800+
const Aws::Client::AWSError<Aws::S3::S3Errors>& error =
801+
outcome.GetError();
802+
std::string errmsg(error.GetMessage().c_str());
803+
Log(InfoLogLevel::ERROR_LEVEL, env_->info_log_,
804+
"[s3] S3WritableFile src path %s error in copying to %s %s",
805+
src_url.c_str(), dest_object.c_str(), errmsg.c_str());
806+
return Status::IOError(dest_object.c_str(), errmsg.c_str());
807+
}
808+
763809
Log(InfoLogLevel::ERROR_LEVEL, env_->info_log_,
764-
"[s3] S3WritableFile src path %s error in copying to %s %s",
765-
src_url.c_str(), dest_object.c_str(), errmsg.c_str());
766-
return Status::IOError(dest_object.c_str(), errmsg.c_str());
810+
"[s3] S3WritableFile src path %s error in copying to %s: CopyObject "
811+
"etags don't match",
812+
src_url.c_str(), dest_object.c_str());
813+
return Status::IOError("CopyObject S3 etags don't match");
767814
}
768815
Log(InfoLogLevel::INFO_LEVEL, env_->info_log_,
769816
"[s3] S3WritableFile src path %s copied to %s %s", src_url.c_str(),

cloud/db_cloud_test.cc

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// Copyright (c) 2017 Rockset
22

3+
#include <gtest/gtest.h>
4+
35
#ifndef ROCKSDB_LITE
46

57
#ifdef USE_AWS
68

7-
#include "rocksdb/cloud/db_cloud.h"
8-
99
#include <algorithm>
1010
#include <chrono>
1111
#include <cinttypes>
@@ -18,6 +18,8 @@
1818
#include "file/filename.h"
1919
#include "logging/logging.h"
2020
#include "rocksdb/cloud/cloud_storage_provider.h"
21+
#include "rocksdb/cloud/db_cloud.h"
22+
#include "rocksdb/env.h"
2123
#include "rocksdb/options.h"
2224
#include "rocksdb/status.h"
2325
#include "rocksdb/table.h"
@@ -1419,6 +1421,47 @@ TEST_F(CloudTest, CheckpointToCloud) {
14191421
checkpoint_bucket.GetBucketName(), checkpoint_bucket.GetObjectPath());
14201422
}
14211423

1424+
// Basic test to copy object within S3.
1425+
TEST_F(CloudTest, CopyObjectTest) {
1426+
CreateAwsEnv();
1427+
1428+
// We need to open an empty DB in order for epoch to work.
1429+
OpenDB();
1430+
1431+
std::string content = "This is a test file";
1432+
std::string fname = dbname_ + "/100000.sst";
1433+
std::string dst_fname = dbname_ + "/200000.sst";
1434+
1435+
{
1436+
std::unique_ptr<WritableFile> writableFile;
1437+
aenv_->NewWritableFile(fname, &writableFile, EnvOptions());
1438+
writableFile->Append(content);
1439+
writableFile->Fsync();
1440+
}
1441+
1442+
Status st = aenv_->GetCloudEnvOptions().storage_provider->CopyCloudObject(
1443+
aenv_->GetSrcBucketName(), aenv_->RemapFilename(fname),
1444+
aenv_->GetSrcBucketName(), dst_fname);
1445+
ASSERT_OK(st);
1446+
1447+
{
1448+
std::unique_ptr<CloudStorageReadableFile> readableFile;
1449+
st = aenv_->GetCloudEnvOptions().storage_provider->NewCloudReadableFile(
1450+
aenv_->GetSrcBucketName(), dst_fname, &readableFile, EnvOptions());
1451+
ASSERT_OK(st);
1452+
1453+
char scratch[100];
1454+
Slice result;
1455+
st = dynamic_cast<SequentialFile*>(readableFile.get())
1456+
->Read(100, &result, scratch);
1457+
ASSERT_OK(st);
1458+
ASSERT_EQ(19, result.size());
1459+
ASSERT_EQ(result, Slice(content));
1460+
}
1461+
1462+
CloseDB();
1463+
}
1464+
14221465
#ifdef AWS_DO_NOT_RUN
14231466
//
14241467
// Verify that we can cache data from S3 in persistent cache.

0 commit comments

Comments
 (0)