Skip to content

Commit d7a1525

Browse files
committed
Merge branch 'master' into Merge-6.8.1
2 parents b541f9f + 10d2947 commit d7a1525

File tree

8 files changed

+121
-51
lines changed

8 files changed

+121
-51
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,7 @@ fbcode
8383
travis-build/
8484
buckifier/*.pyc
8585
buckifier/__pycache__
86+
87+
compile_commands.json
88+
.vscode
89+
.clangd

cloud/aws/aws_env.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -469,9 +469,9 @@ Status AwsEnv::GetPathForDbid(const std::string& bucket,
469469
Log(InfoLogLevel::DEBUG_LEVEL, info_log_,
470470
"[s3] Bucket %s GetPathForDbid dbid %s", bucket.c_str(), dbid.c_str());
471471

472-
std::unordered_map<std::string, std::string> metadata;
472+
CloudObjectInformation info;
473473
Status st = cloud_env_options.storage_provider->GetCloudObjectMetadata(
474-
bucket, dbidkey, &metadata);
474+
bucket, dbidkey, &info);
475475
if (!st.ok()) {
476476
if (st.IsNotFound()) {
477477
Log(InfoLogLevel::ERROR_LEVEL, info_log_,
@@ -487,8 +487,8 @@ Status AwsEnv::GetPathForDbid(const std::string& bucket,
487487

488488
// Find "dirname" metadata that stores the pathname of the db
489489
const char* kDirnameTag = "dirname";
490-
auto it = metadata.find(kDirnameTag);
491-
if (it != metadata.end()) {
490+
auto it = info.metadata.find(kDirnameTag);
491+
if (it != info.metadata.end()) {
492492
*dirname = it->second;
493493
} else {
494494
st = Status::NotFound("GetPathForDbid");

cloud/aws/aws_s3.cc

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -258,12 +258,23 @@ class S3ReadableFile : public CloudStorageReadableFileImpl {
258258
S3ReadableFile(const std::shared_ptr<AwsS3ClientWrapper>& s3client,
259259
const std::shared_ptr<Logger>& info_log,
260260
const std::string& bucket, const std::string& fname,
261-
uint64_t size)
261+
uint64_t size, std::string content_hash)
262262
: CloudStorageReadableFileImpl(info_log, bucket, fname, size),
263-
s3client_(s3client) {}
263+
s3client_(s3client),
264+
content_hash_(std::move(content_hash)) {}
264265

265266
virtual const char* Type() const { return "s3"; }
266267

268+
virtual size_t GetUniqueId(char* id, size_t max_size) const override {
269+
if (content_hash_.empty()) {
270+
return 0;
271+
}
272+
273+
max_size = std::min(content_hash_.size(), max_size);
274+
memcpy(id, content_hash_.c_str(), max_size);
275+
return max_size;
276+
}
277+
267278
// random access, read data from specified offset in file
268279
Status DoCloudRead(uint64_t offset, size_t n, char* scratch,
269280
uint64_t* bytes_read) const override {
@@ -328,6 +339,7 @@ class S3ReadableFile : public CloudStorageReadableFileImpl {
328339

329340
private:
330341
std::shared_ptr<AwsS3ClientWrapper> s3client_;
342+
std::string content_hash_;
331343
}; // End class S3ReadableFile
332344

333345
/******************** Writablefile ******************/
@@ -371,9 +383,10 @@ class S3StorageProvider : public CloudStorageProviderImpl {
371383
uint64_t* time) override;
372384

373385
// Get the metadata of the object in cloud storage
374-
Status GetCloudObjectMetadata(
375-
const std::string& bucket_name, const std::string& object_path,
376-
std::unordered_map<std::string, std::string>* metadata) override;
386+
Status GetCloudObjectMetadata(const std::string& bucket_name,
387+
const std::string& object_path,
388+
CloudObjectInformation* info) override;
389+
377390
Status PutCloudObjectMetadata(
378391
const std::string& bucket_name, const std::string& object_path,
379392
const std::unordered_map<std::string, std::string>& metadata) override;
@@ -383,6 +396,7 @@ class S3StorageProvider : public CloudStorageProviderImpl {
383396
const std::string& object_path_dest) override;
384397
Status DoNewCloudReadableFile(
385398
const std::string& bucket, const std::string& fname, uint64_t fsize,
399+
const std::string& content_hash,
386400
std::unique_ptr<CloudStorageReadableFile>* result,
387401
const EnvOptions& options) override;
388402
Status NewCloudWritableFile(const std::string& local_path,
@@ -403,11 +417,13 @@ class S3StorageProvider : public CloudStorageProviderImpl {
403417
uint64_t file_size) override;
404418

405419
private:
406-
// If metadata, size or modtime is non-nullptr, returns requested data
420+
// If metadata, size modtime or etag is non-nullptr, returns requested data
407421
Status HeadObject(
408422
const std::string& bucket, const std::string& path,
409423
std::unordered_map<std::string, std::string>* metadata = nullptr,
410-
uint64_t* size = nullptr, uint64_t* modtime = nullptr);
424+
uint64_t* size = nullptr, uint64_t* modtime = nullptr,
425+
std::string* etag = nullptr);
426+
411427
// The S3 client
412428
std::shared_ptr<AwsS3ClientWrapper> s3client_;
413429
};
@@ -646,10 +662,12 @@ Status S3StorageProvider::GetCloudObjectModificationTime(
646662
return HeadObject(bucket_name, object_path, nullptr, nullptr, time);
647663
}
648664

649-
Status S3StorageProvider::GetCloudObjectMetadata(
650-
const std::string& bucket_name, const std::string& object_path,
651-
std::unordered_map<std::string, std::string>* metadata) {
652-
return HeadObject(bucket_name, object_path, metadata, nullptr, nullptr);
665+
Status S3StorageProvider::GetCloudObjectMetadata(const std::string& bucket_name,
666+
const std::string& object_path,
667+
CloudObjectInformation* info) {
668+
assert(info != nullptr);
669+
return HeadObject(bucket_name, object_path, &info->metadata, &info->size,
670+
&info->modification_time, &info->content_hash);
653671
}
654672

655673
Status S3StorageProvider::PutCloudObjectMetadata(
@@ -680,10 +698,11 @@ Status S3StorageProvider::PutCloudObjectMetadata(
680698

681699
Status S3StorageProvider::DoNewCloudReadableFile(
682700
const std::string& bucket, const std::string& fname, uint64_t fsize,
701+
const std::string& content_hash,
683702
std::unique_ptr<CloudStorageReadableFile>* result,
684703
const EnvOptions& /*options*/) {
685-
result->reset(
686-
new S3ReadableFile(s3client_, env_->info_log_, bucket, fname, fsize));
704+
result->reset(new S3ReadableFile(s3client_, env_->info_log_, bucket, fname,
705+
fsize, content_hash));
687706
return Status::OK();
688707
}
689708

@@ -700,7 +719,7 @@ Status S3StorageProvider::NewCloudWritableFile(
700719
Status S3StorageProvider::HeadObject(
701720
const std::string& bucket_name, const std::string& object_path,
702721
std::unordered_map<std::string, std::string>* metadata, uint64_t* size,
703-
uint64_t* modtime) {
722+
uint64_t* modtime, std::string* etag) {
704723
Aws::S3::Model::HeadObjectRequest request;
705724
request.SetBucket(ToAwsString(bucket_name));
706725
request.SetKey(ToAwsString(object_path));
@@ -727,6 +746,9 @@ Status S3StorageProvider::HeadObject(
727746
if (modtime != nullptr) {
728747
*modtime = res.GetLastModified().Millis();
729748
}
749+
if (etag != nullptr) {
750+
*etag = std::string(res.GetETag().data(), res.GetETag().length());
751+
}
730752
return Status::OK();
731753
}
732754

cloud/cloud_env_wrapper.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ class MockStorageProvider : public CloudStorageProvider {
5151
uint64_t* /*time*/) override {
5252
return notsup_;
5353
}
54-
Status GetCloudObjectMetadata(
55-
const std::string& /*bucket_name*/, const std::string& /*object_path*/,
56-
std::unordered_map<std::string, std::string>* /*metadata*/) override {
54+
Status GetCloudObjectMetadata(const std::string& /*bucket_name*/,
55+
const std::string& /*object_path*/,
56+
CloudObjectInformation* /* info */) override {
5757
return notsup_;
5858
}
5959
Status CopyCloudObject(const std::string& /*bucket_name_src*/,

cloud/cloud_storage_provider.cc

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -99,24 +99,6 @@ Status CloudStorageReadableFileImpl::Skip(uint64_t n) {
9999
return Status::OK();
100100
}
101101

102-
size_t CloudStorageReadableFileImpl::GetUniqueId(char* id,
103-
size_t max_size) const {
104-
// If this is an SST file name, then it can part of the persistent cache.
105-
// We need to generate a unique id for the cache.
106-
// If it is not a sst file, then nobody should be using this id.
107-
uint64_t file_number;
108-
FileType file_type;
109-
WalFileType log_type;
110-
ParseFileName(RemoveEpoch(basename(fname_)), &file_number, &file_type,
111-
&log_type);
112-
if (max_size >= kMaxVarint64Length && file_number > 0) {
113-
char* rid = id;
114-
rid = EncodeVarint64(rid, file_number);
115-
return static_cast<size_t>(rid - id);
116-
}
117-
return 0;
118-
}
119-
120102
/******************** Writablefile ******************/
121103

122104
CloudStorageWritableFileImpl::CloudStorageWritableFileImpl(
@@ -306,14 +288,14 @@ Status CloudStorageProviderImpl::NewCloudReadableFile(
306288
const std::string& bucket, const std::string& fname,
307289
std::unique_ptr<CloudStorageReadableFile>* result,
308290
const EnvOptions& options) {
309-
// First, check if the file exists and also find its size. We use size in
310-
// CloudReadableFile to make sure we always read the valid ranges of the file
311-
uint64_t size;
312-
Status st = GetCloudObjectSize(bucket, fname, &size);
291+
CloudObjectInformation info;
292+
Status st = GetCloudObjectMetadata(bucket, fname, &info);
293+
313294
if (!st.ok()) {
314295
return st;
315296
}
316-
return DoNewCloudReadableFile(bucket, fname, size, result, options);
297+
return DoNewCloudReadableFile(bucket, fname, info.size, info.content_hash,
298+
result, options);
317299
}
318300

319301
Status CloudStorageProviderImpl::GetCloudObject(

cloud/cloud_storage_provider_impl.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ class CloudStorageReadableFileImpl : public CloudStorageReadableFile {
2121

2222
virtual Status Skip(uint64_t n) override;
2323

24-
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
25-
2624
protected:
2725
virtual Status DoCloudRead(uint64_t offset, size_t n, char* scratch,
2826
uint64_t* bytes_read) const = 0;
@@ -124,6 +122,7 @@ class CloudStorageProviderImpl : public CloudStorageProvider {
124122

125123
virtual Status DoNewCloudReadableFile(
126124
const std::string& bucket, const std::string& fname, uint64_t fsize,
125+
const std::string& content_hash,
127126
std::unique_ptr<CloudStorageReadableFile>* result,
128127
const EnvOptions& options) = 0;
129128

cloud/db_cloud_test.cc

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ class CloudTest : public testing::Test {
161161
const std::string& dest_bucket_name,
162162
const std::string& dest_object_path,
163163
std::unique_ptr<DBCloud>* cloud_db,
164-
std::unique_ptr<CloudEnv>* cloud_env) {
164+
std::unique_ptr<CloudEnv>* cloud_env,
165+
bool force_keep_local_on_invalid_dest_bucket = true) {
165166
// The local directory where the clone resides
166167
std::string cname = clone_dir_ + "/" + clone_name;
167168

@@ -177,7 +178,8 @@ class CloudTest : public testing::Test {
177178
copt.dest_bucket.SetBucketName(dest_bucket_name);
178179
}
179180
copt.dest_bucket.SetObjectPath(dest_object_path);
180-
if (!copt.dest_bucket.IsValid()) {
181+
if (!copt.dest_bucket.IsValid() &&
182+
force_keep_local_on_invalid_dest_bucket) {
181183
copt.keep_local_sst_files = true;
182184
}
183185
// Create new AWS env
@@ -1483,6 +1485,56 @@ TEST_F(CloudTest, PersistentCache) {
14831485
CloseDB();
14841486
}
14851487

1488+
// This test create 2 DBs that shares a block cache. Ensure that reads from one
1489+
// DB do not get the values from the other DB.
1490+
TEST_F(CloudTest, SharedBlockCache) {
1491+
cloud_env_options_.keep_local_sst_files = false;
1492+
1493+
// Share the block cache.
1494+
rocksdb::BlockBasedTableOptions bbto;
1495+
bbto.block_cache = NewLRUCache(10 * 1024 * 1024);
1496+
bbto.format_version = 4;
1497+
options_.table_factory.reset(NewBlockBasedTableFactory(bbto));
1498+
1499+
OpenDB();
1500+
1501+
std::unique_ptr<CloudEnv> clone_env;
1502+
std::unique_ptr<DBCloud> clone_db;
1503+
CloneDB("newdb1", cloud_env_options_.src_bucket.GetBucketName(),
1504+
cloud_env_options_.src_bucket.GetObjectPath() + "-clone", &clone_db,
1505+
&clone_env, false /* force_keep_local_on_invalid_dest_bucket */);
1506+
1507+
// Flush the first DB.
1508+
db_->Put(WriteOptions(), "db", "original");
1509+
db_->Flush(FlushOptions());
1510+
1511+
// Flush the second DB.
1512+
clone_db->Put(WriteOptions(), "db", "clone");
1513+
clone_db->Flush(FlushOptions());
1514+
1515+
std::vector<LiveFileMetaData> file_metadatas;
1516+
db_->GetLiveFilesMetaData(&file_metadatas);
1517+
ASSERT_EQ(1, file_metadatas.size());
1518+
1519+
file_metadatas.clear();
1520+
clone_db->GetLiveFilesMetaData(&file_metadatas);
1521+
ASSERT_EQ(1, file_metadatas.size());
1522+
1523+
std::string value;
1524+
clone_db->Get(ReadOptions(), "db", &value);
1525+
ASSERT_EQ("clone", value);
1526+
1527+
db_->Get(ReadOptions(), "db", &value);
1528+
ASSERT_EQ("original", value);
1529+
1530+
// Cleanup
1531+
clone_db->Close();
1532+
CloseDB();
1533+
clone_env->GetCloudEnvOptions().storage_provider->EmptyBucket(
1534+
cloud_env_options_.src_bucket.GetBucketName(),
1535+
cloud_env_options_.src_bucket.GetObjectPath() + "-clone");
1536+
}
1537+
14861538
} // namespace ROCKSDB_NAMESPACE
14871539

14881540
// A black-box test for the cloud wrapper around rocksdb

include/rocksdb/cloud/cloud_storage_provider.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,17 @@ class CloudStorageWritableFile : public WritableFile {
2929
virtual const char* Name() const { return "cloud"; }
3030
};
3131

32+
// Generic information of the object in the cloud. Some information might be
33+
// vendor-dependent.
34+
struct CloudObjectInformation {
35+
uint64_t size;
36+
uint64_t modification_time;
37+
38+
// Cloud-vendor dependent. In S3, we will provide ETag of the object.
39+
std::string content_hash;
40+
std::unordered_map<std::string, std::string> metadata;
41+
};
42+
3243
// A CloudStorageProvider provides the interface to the cloud object
3344
// store. Methods can create and empty buckets, as well as other
3445
// standard bucket object operations get/put/list/delete
@@ -70,9 +81,9 @@ class CloudStorageProvider {
7081
uint64_t* time) = 0;
7182

7283
// Get the metadata of the object in cloud storage
73-
virtual Status GetCloudObjectMetadata(
74-
const std::string& bucket_name, const std::string& object_path,
75-
std::unordered_map<std::string, std::string>* metadata) = 0;
84+
virtual Status GetCloudObjectMetadata(const std::string& bucket_name,
85+
const std::string& object_path,
86+
CloudObjectInformation* info) = 0;
7687

7788
// Copy the specified cloud object from one location in the cloud
7889
// storage to another location in cloud storage

0 commit comments

Comments
 (0)