From 534483dc699bb0938c7b576460b1a1d8b5e15a2b Mon Sep 17 00:00:00 2001 From: githubzilla Date: Thu, 28 Sep 2023 09:03:13 +0000 Subject: [PATCH 01/16] Google cloud storage support @githubzilla @cmhms --- .clang-format | 53 +- .gitignore | 4 + CMakeLists.txt | 2 +- Makefile | 8 +- TARGETS | 10 +- build_tools/build_detect_platform | 12 +- cloud/aws/aws_s3.cc | 4 +- cloud/cloud_file_system.cc | 154 +- cloud/cloud_file_system_test.cc | 3 + cloud/db_cloud_test.cc | 10 +- cloud/filename.h | 28 +- cloud/gcp/gcp_cs.cc | 724 ++++ cloud/gcp/gcp_db_cloud_test.cc | 3335 +++++++++++++++++ cloud/gcp/gcp_file_system.cc | 111 + cloud/gcp/gcp_file_system.h | 39 + cloud/gcp/gcp_file_system_test.cc | 248 ++ cloud/gcp/gcp_retry.cc | 123 + include/rocksdb/cloud/cloud_file_system.h | 181 +- .../rocksdb/cloud/cloud_file_system_impl.h | 4 +- .../cloud/cloud_storage_provider_impl.h | 81 +- src.mk | 7 +- 21 files changed, 4978 insertions(+), 163 deletions(-) create mode 100644 cloud/gcp/gcp_cs.cc create mode 100644 cloud/gcp/gcp_db_cloud_test.cc create mode 100644 cloud/gcp/gcp_file_system.cc create mode 100644 cloud/gcp/gcp_file_system.h create mode 100644 cloud/gcp/gcp_file_system_test.cc create mode 100644 cloud/gcp/gcp_retry.cc diff --git a/.clang-format b/.clang-format index 7c279811ac1..7d9b39f7fe0 100644 --- a/.clang-format +++ b/.clang-format @@ -1,5 +1,50 @@ -# Complete list of style options can be found at: -# http://clang.llvm.org/docs/ClangFormatStyleOptions.html ---- +# Use the Google style in this project. BasedOnStyle: Google -... + +# Some folks prefer to write "int& foo" while others prefer "int &foo". The +# Google Style Guide only asks for consistency within a project, we chose +# "int& foo" for this project: +DerivePointerAlignment: false +PointerAlignment: Left + +# The Google Style Guide only asks for consistency w.r.t. "east const" vs. +# "const west" alignment of cv-qualifiers. In this project we use "east const". +QualifierAlignment: Right + +IncludeBlocks: Merge +IncludeCategories: +# Matches common headers first, but sorts them after project includes +- Regex: '^\"google/cloud/internal/disable_deprecation_warnings.inc\"$' + Priority: -1 +- Regex: '^\"google/cloud/(internal/|grpc_utils/|testing_util/|[^/]+\.h)' + Priority: 1000 +- Regex: '^\"google/cloud/' # project includes should sort first + Priority: 500 +- Regex: '^\"generator/' # project includes should sort first + Priority: 500 +- Regex: '^\"generator/internal/' # project internals second + Priority: 1000 +- Regex: '^\"generator/testing/' # testing helpers third + Priority: 1100 +- Regex: '^\"' # And then includes from other projects or the system + Priority: 1500 +- Regex: '^' + Priority: 4000 +- Regex: '^<[^/]*>' + Priority: 5000 + +# Format raw string literals with a `pb` or `proto` tag as proto. +RawStringFormats: +- Language: TextProto + Delimiters: + - 'pb' + - 'proto' + BasedOnStyle: Google + +CommentPragmas: '(@copydoc|@copybrief|@see|@overload|@snippet)' \ No newline at end of file diff --git a/.gitignore b/.gitignore index d884f9aa5dc..44b5b01fa6c 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,7 @@ cmake-build-* third-party/folly/ .cache *.sublime-* + +.clang-format +.editorconfig +*.vim diff --git a/CMakeLists.txt b/CMakeLists.txt index e088e94fdc8..eb447b32f06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1651,4 +1651,4 @@ endif() option(WITH_BENCHMARK "build benchmark tests" OFF) if(WITH_BENCHMARK) add_subdirectory(${PROJECT_SOURCE_DIR}/microbench/) -endif() +endif() \ No newline at end of file diff --git a/Makefile b/Makefile index 8a9c884cd50..59cd6db51c0 100644 --- a/Makefile +++ b/Makefile @@ -1886,6 +1886,12 @@ replication_test: cloud/replication_test.o $(TEST_LIBRARY) $(LIBRARY) cloud_file_system_test: cloud/cloud_file_system_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) +gcp_file_system_test: cloud/gcp/gcp_file_system_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +gcp_db_cloud_test: cloud/gcp/gcp_db_cloud_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + cloud_manifest_test: cloud/cloud_manifest_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) @@ -2618,4 +2624,4 @@ list_all_tests: ROCKS_DEP_RULES=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean jtest package analyze tags rocksdbjavastatic% unity.% unity_test checkout_folly, $(MAKECMDGOALS)) ifneq ("$(ROCKS_DEP_RULES)", "") -include $(DEPFILES) -endif +endif \ No newline at end of file diff --git a/TARGETS b/TARGETS index 77a7c16fd7e..d456e95034d 100644 --- a/TARGETS +++ b/TARGETS @@ -375,7 +375,6 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "//folly/synchronization:distributed_mutex", ], headers=None, link_whole=False, extra_test_libs=False) -<<<<<<< HEAD cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[ "cache/cache.cc", "cache/cache_entry_roles.cc", @@ -4983,6 +4982,15 @@ cpp_unittest_wrapper(name="cloud_file_system_test", deps=[":rocksdb_test_lib"], extra_compiler_flags=[]) +cpp_unittest_wrapper(name="gcp_file_system_test", + srcs=["cloud/gcp/gcp_file_system_test.cc"], + deps=[":rocksdb_test_lib"], + extra_compiler_flags=[]) + +cpp_unittest_wrapper(name="gcp_db_cloud_test", + srcs=["cloud/gcp/gcp_db_cloud_test.cc"], + deps=[":rocksdb_test_lib"], + extra_compiler_flags=[]) cpp_unittest_wrapper(name="cloud_manifest_test", srcs=["cloud/cloud_manifest_test.cc"], diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index b56e298554b..9dfa4396431 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -641,6 +641,16 @@ if [ "${USE_AWS}XXX" = "1XXX" ]; then COMMON_FLAGS="$COMMON_FLAGS $S3_CCFLAGS" PLATFORM_LDFLAGS="$S3_LDFLAGS $PLATFORM_LDFLAGS" fi + +if [ "${USE_GCP}XXX" = "1XXX" ]; then + GCP_SDK=/usr/local + GCI=${GCP_SDK}/include/ + GCS_CCFLAGS="$GCS_CCFLAGS -I$GCI -DUSE_GCP" + GCS_LDFLAGS="$GCS_LDFLAGS -lgoogle_cloud_cpp_common -lgoogle_cloud_cpp_storage" + COMMON_FLAGS="$COMMON_FLAGS $GCS_CCFLAGS" + PLATFORM_LDFLAGS="$GCS_LDFLAGS $PLATFORM_LDFLAGS" +fi + # # Support the Kafka WAL storing if the env variable named USE_KAFKA # is set to 1. Setting it to any other value or not setting it at all means @@ -832,4 +842,4 @@ if test -n "$USE_FOLLY"; then fi if test -n "$PPC_LIBC_IS_GNU"; then echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT" -fi +fi \ No newline at end of file diff --git a/cloud/aws/aws_s3.cc b/cloud/aws/aws_s3.cc index ce9cda635cd..300eda53c36 100644 --- a/cloud/aws/aws_s3.cc +++ b/cloud/aws/aws_s3.cc @@ -688,7 +688,7 @@ IOStatus S3StorageProvider::ExistsCloudObject(const std::string& bucket_name, IOStatus S3StorageProvider::GetCloudObjectSize(const std::string& bucket_name, const std::string& object_path, uint64_t* filesize) { - HeadObjectResult result; + HeadObjectResult result; result.size = filesize; return HeadObject(bucket_name, object_path, &result); } @@ -1064,4 +1064,4 @@ Status CloudStorageProviderImpl::CreateS3Provider( #endif /* USE_AWS */ } } // namespace ROCKSDB_NAMESPACE -#endif // ROCKSDB_LITE +#endif // ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/cloud_file_system.cc b/cloud/cloud_file_system.cc index 4a660830df1..584481a333f 100644 --- a/cloud/cloud_file_system.cc +++ b/cloud/cloud_file_system.cc @@ -7,9 +7,9 @@ #else #include #endif -#include - #include "cloud/aws/aws_file_system.h" +#include "cloud/gcp/gcp_file_system.h" +#include "cloud/cloud_file_system_impl.h" #include "cloud/cloud_log_controller_impl.h" #include "cloud/cloud_manifest.h" #include "cloud/db_cloud_impl.h" @@ -28,6 +28,7 @@ #include "rocksdb/utilities/object_registry.h" #include "rocksdb/utilities/options_type.h" #include "util/string_util.h" +#include namespace ROCKSDB_NAMESPACE { @@ -86,8 +87,8 @@ void CloudFileSystemOptions::Dump(Logger* log) const { } } -bool CloudFileSystemOptions::GetNameFromEnvironment(const char* name, - const char* alt, +bool CloudFileSystemOptions::GetNameFromEnvironment(char const* name, + char const* alt, std::string* result) { char* value = getenv(name); // See if name is set in the environment if (value == nullptr && @@ -101,9 +102,9 @@ bool CloudFileSystemOptions::GetNameFromEnvironment(const char* name, return false; // No, return not found } } -void CloudFileSystemOptions::TEST_Initialize(const std::string& bucket, - const std::string& object, - const std::string& region) { +void CloudFileSystemOptions::TEST_Initialize(std::string const& bucket, + std::string const& object, + std::string const& region) { src_bucket.TEST_Initialize(bucket, object, region); dest_bucket = src_bucket; } @@ -150,9 +151,9 @@ void BucketOptions::SetBucketPrefix(std::string prefix) { // Initializes the bucket properties -void BucketOptions::TEST_Initialize(const std::string& bucket, - const std::string& object, - const std::string& region) { +void BucketOptions::TEST_Initialize(std::string const& bucket, + std::string const& object, + std::string const& region) { std::string prefix; // If the bucket name is not set, then the bucket name is not set, // Set it to either the value of the environment variable or geteuid @@ -188,20 +189,20 @@ static std::unordered_map {"object", {0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const std::string& value, void* addr) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + std::string const& value, void* addr) { auto bucket = static_cast(addr); bucket->SetObjectPath(value); return Status::OK(); }, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const void* addr, std::string* value) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + void const* addr, std::string* value) { auto bucket = static_cast(addr); *value = bucket->GetObjectPath(); return Status::OK(); }, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const void* addr1, const void* addr2, std::string* /*mismatch*/) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + void const* addr1, void const* addr2, std::string* /*mismatch*/) { auto bucket1 = static_cast(addr1); auto bucket2 = static_cast(addr2); return bucket1->GetObjectPath() == bucket2->GetObjectPath(); @@ -209,20 +210,20 @@ static std::unordered_map {"region", {0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const std::string& value, void* addr) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + std::string const& value, void* addr) { auto bucket = static_cast(addr); bucket->SetRegion(value); return Status::OK(); }, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const void* addr, std::string* value) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + void const* addr, std::string* value) { auto bucket = static_cast(addr); *value = bucket->GetRegion(); return Status::OK(); }, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const void* addr1, const void* addr2, std::string* /*mismatch*/) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + void const* addr1, void const* addr2, std::string* /*mismatch*/) { auto bucket1 = static_cast(addr1); auto bucket2 = static_cast(addr2); return bucket1->GetRegion() == bucket2->GetRegion(); @@ -230,20 +231,20 @@ static std::unordered_map {"prefix", {0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kNone, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const std::string& value, void* addr) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + std::string const& value, void* addr) { auto bucket = static_cast(addr); bucket->SetBucketName(bucket->GetBucketName(false), value); return Status::OK(); }, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const void* addr, std::string* value) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + void const* addr, std::string* value) { auto bucket = static_cast(addr); *value = bucket->GetBucketPrefix(); return Status::OK(); }, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const void* addr1, const void* addr2, std::string* /*mismatch*/) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + void const* addr1, void const* addr2, std::string* /*mismatch*/) { auto bucket1 = static_cast(addr1); auto bucket2 = static_cast(addr2); return bucket1->GetBucketPrefix() == bucket2->GetBucketPrefix(); @@ -251,20 +252,20 @@ static std::unordered_map {"bucket", {0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kNone, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const std::string& value, void* addr) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + std::string const& value, void* addr) { auto bucket = static_cast(addr); bucket->SetBucketName(value); return Status::OK(); }, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const void* addr, std::string* value) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + void const* addr, std::string* value) { auto bucket = static_cast(addr); *value = bucket->GetBucketName(false); return Status::OK(); }, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const void* addr1, const void* addr2, std::string* /*mismatch*/) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + void const* addr1, void const* addr2, std::string* /*mismatch*/) { auto bucket1 = static_cast(addr1); auto bucket2 = static_cast(addr2); return bucket1->GetBucketName(false) == @@ -273,8 +274,8 @@ static std::unordered_map {"TEST", {0, OptionType::kUnknown, OptionVerificationType::kAlias, OptionTypeFlags::kNone, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const std::string& value, void* addr) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + std::string const& value, void* addr) { auto bucket = static_cast(addr); std::string name = value; std::string path; @@ -339,8 +340,8 @@ const std::unordered_map OptionType::kConfigurable, OptionVerificationType::kByNameAllowNull, (OptionTypeFlags::kShared | OptionTypeFlags::kCompareLoose | OptionTypeFlags::kCompareNever | OptionTypeFlags::kAllowNull), - [](const ConfigOptions& opts, const std::string& /*name*/, - const std::string& value, void* addr) { + [](ConfigOptions const& opts, std::string const& /*name*/, + std::string const& value, void* addr) { auto provider = static_cast*>(addr); return CloudStorageProvider::CreateFromString(opts, value, @@ -352,8 +353,8 @@ const std::unordered_map (OptionTypeFlags::kShared | OptionTypeFlags::kCompareLoose | OptionTypeFlags::kCompareNever | OptionTypeFlags::kAllowNull), // Creates a new TableFactory based on value - [](const ConfigOptions& opts, const std::string& /*name*/, - const std::string& value, void* addr) { + [](ConfigOptions const& opts, std::string const& /*name*/, + std::string const& value, void* addr) { auto controller = static_cast*>(addr); Status s = @@ -371,8 +372,8 @@ const std::unordered_map {"TEST", {0, OptionType::kUnknown, OptionVerificationType::kAlias, OptionTypeFlags::kNone, - [](const ConfigOptions& /*opts*/, const std::string& /*name*/, - const std::string& value, void* addr) { + [](ConfigOptions const& /*opts*/, std::string const& /*name*/, + std::string const& value, void* addr) { auto copts = static_cast(addr); std::string name; std::string path; @@ -393,8 +394,8 @@ const std::unordered_map }}}, }; -Status CloudFileSystemOptions::Configure(const ConfigOptions& config_options, - const std::string& opts_str) { +Status CloudFileSystemOptions::Configure(ConfigOptions const& config_options, + std::string const& opts_str) { std::string current; Status s; if (!config_options.ignore_unknown_options) { @@ -418,12 +419,12 @@ Status CloudFileSystemOptions::Configure(const ConfigOptions& config_options, return s; } -Status CloudFileSystemOptions::Serialize(const ConfigOptions& config_options, +Status CloudFileSystemOptions::Serialize(ConfigOptions const& config_options, std::string* value) const { return OptionTypeInfo::SerializeStruct( config_options, CloudFileSystemOptions::kName(), &cloud_fs_option_type_info, CloudFileSystemOptions::kName(), - reinterpret_cast(this), value); + reinterpret_cast(this), value); } Status CloudFileSystemEnv::NewAwsFileSystem( @@ -448,12 +449,34 @@ Status CloudFileSystemEnv::NewAwsFileSystem( return NewAwsFileSystem(base_fs, options, logger, cfs); } -int DoRegisterCloudObjects(ObjectLibrary& library, const std::string& arg) { +Status CloudFileSystem::NewGcpFileSystem( + std::shared_ptr const& base_fs, + std::string const& src_cloud_bucket, std::string const& src_cloud_object, + std::string const& src_cloud_region, std::string const& dest_cloud_bucket, + std::string const& dest_cloud_object, std::string const& dest_cloud_region, + CloudFileSystemOptions const& cloud_options, + std::shared_ptr const& logger, CloudFileSystem** cfs) { + CloudFileSystemOptions options = cloud_options; + if (!src_cloud_bucket.empty()) + options.src_bucket.SetBucketName(src_cloud_bucket); + if (!src_cloud_object.empty()) + options.src_bucket.SetObjectPath(src_cloud_object); + if (!src_cloud_region.empty()) options.src_bucket.SetRegion(src_cloud_region); + if (!dest_cloud_bucket.empty()) + options.dest_bucket.SetBucketName(dest_cloud_bucket); + if (!dest_cloud_object.empty()) + options.dest_bucket.SetObjectPath(dest_cloud_object); + if (!dest_cloud_region.empty()) + options.dest_bucket.SetRegion(dest_cloud_region); + return NewGcpFileSystem(base_fs, options, logger, cfs); +} + +int DoRegisterCloudObjects(ObjectLibrary& library, std::string const& arg) { int count = 0; // Register the FileSystem types library.AddFactory( CloudFileSystemImpl::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, + [](std::string const& /*uri*/, std::unique_ptr* guard, std::string* /*errmsg*/) { guard->reset(new CloudFileSystemImpl(CloudFileSystemOptions(), FileSystem::Default(), @@ -463,12 +486,13 @@ int DoRegisterCloudObjects(ObjectLibrary& library, const std::string& arg) { count++; count += CloudFileSystemImpl::RegisterAwsObjects(library, arg); + count += CloudFileSystemImpl::RegisterGcpObjects(library, arg); // Register the Cloud Log Controllers library.AddFactory( CloudLogControllerImpl::kKafka(), - [](const std::string& /*uri*/, std::unique_ptr* guard, + [](std::string const& /*uri*/, std::unique_ptr* guard, std::string* errmsg) { Status s = CloudLogControllerImpl::CreateKafkaController(guard); if (!s.ok()) { @@ -638,6 +662,38 @@ Status CloudFileSystemEnv::NewAwsFileSystem( } #endif +#ifndef USE_GCP +Status CloudFileSystem::NewGcpFileSystem( + std::shared_ptr const& /*base_fs*/, + CloudFileSystemOptions const& /*options*/, + std::shared_ptr const& /*logger*/, CloudFileSystem** /*cfs*/) { + return Status::NotSupported("RocksDB Cloud not compiled with GCP support"); +} +#else +Status CloudFileSystem::NewGcpFileSystem( + std::shared_ptr const& base_fs, + CloudFileSystemOptions const& options, + std::shared_ptr const& logger, CloudFileSystem** cfs) { + CloudFileSystem::RegisterCloudObjects(); + //Dump out cloud fs options + options.Dump(logger.get()); + + Status st = GcpFileSystem::NewGcpFileSystem(base_fs, options, logger, cfs); + if(st.ok()) { + //store a copy to the logger + auto* cloud = static_cast(*cfs); + cloud->info_log_ = logger; + + //start the purge thread only if there is a destination bucket + if(options.dest_bucket.IsValid() && options.run_purger) { + cloud->purge_thread_ = std::thread([cloud] { cloud->Purger(); }); + } + } + + return st; +} +#endif + std::unique_ptr CloudFileSystemEnv::NewCompositeEnv( Env* env, const std::shared_ptr& fs) { return std::make_unique(env, fs); diff --git a/cloud/cloud_file_system_test.cc b/cloud/cloud_file_system_test.cc index d397b0670dd..89d106908d3 100644 --- a/cloud/cloud_file_system_test.cc +++ b/cloud/cloud_file_system_test.cc @@ -11,6 +11,8 @@ #include "test_util/testharness.h" #include "util/string_util.h" +#include + namespace ROCKSDB_NAMESPACE { TEST(CloudFileSystemTest, TestBucket) { @@ -242,5 +244,6 @@ TEST(CloudFileSystemTest, ConfigureKafkaController) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); + Aws::InitAPI(Aws::SDKOptions()); return RUN_ALL_TESTS(); } diff --git a/cloud/db_cloud_test.cc b/cloud/db_cloud_test.cc index 0cf5cbfeac9..4fea66af0e1 100644 --- a/cloud/db_cloud_test.cc +++ b/cloud/db_cloud_test.cc @@ -1766,8 +1766,13 @@ TEST_F(CloudTest, CheckpointToCloud) { auto checkpoint_bucket = cloud_fs_options_.dest_bucket; + std::string ckpt_from_object_path = + cloud_fs_options_.dest_bucket.GetObjectPath(); + ckpt_from_object_path += "_from"; cloud_fs_options_.src_bucket = BucketOptions(); + cloud_fs_options_.src_bucket.SetObjectPath(ckpt_from_object_path); cloud_fs_options_.dest_bucket = BucketOptions(); + cloud_fs_options_.dest_bucket.SetObjectPath(ckpt_from_object_path); // Create a DB with two files OpenDB(); @@ -1783,6 +1788,9 @@ TEST_F(CloudTest, CheckpointToCloud) { CloseDB(); DestroyDir(dbname_); + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + checkpoint_bucket.GetBucketName(), + cloud_fs_options_.dest_bucket.GetObjectPath()); cloud_fs_options_.src_bucket = checkpoint_bucket; @@ -3251,4 +3259,4 @@ int main(int, char**) { return 0; } -#endif // !ROCKSDB_LITE +#endif // !ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/filename.h b/cloud/filename.h index 887594b8a66..aa68423d883 100644 --- a/cloud/filename.h +++ b/cloud/filename.h @@ -198,6 +198,32 @@ inline bool IsCloudManifestFile(const std::string& pathname) { return false; } +inline std::string ReduceSlashes(const std::string& pathname) +{ + std::string result; + const char slash = '/'; + + bool previous_was_slash = false; + for (char c : pathname) + { + if (c == slash) + { + if (!previous_was_slash) + { + result += c; + previous_was_slash = true; + } + } + else + { + result += c; + previous_was_slash = false; + } + } + + return result; +} + enum class RocksDBFileType { kSstFile, kLogFile, @@ -228,4 +254,4 @@ inline RocksDBFileType GetFileType(const std::string& fname_with_epoch) { return RocksDBFileType::kUnknown; } -} // namespace +} // namespace \ No newline at end of file diff --git a/cloud/gcp/gcp_cs.cc b/cloud/gcp/gcp_cs.cc new file mode 100644 index 00000000000..e0420f700f6 --- /dev/null +++ b/cloud/gcp/gcp_cs.cc @@ -0,0 +1,724 @@ +#ifndef ROCKSDB_LITE +#ifdef USE_GCP +#include "google/cloud/storage/bucket_metadata.h" +#include "google/cloud/storage/client.h" + +namespace gcs = ::google::cloud::storage; +namespace gcp = ::google::cloud; +#endif + +#include "cloud/cloud_storage_provider_impl.h" +#include "cloud/filename.h" +#include "cloud/gcp/gcp_file_system.h" +#include "rocksdb/cloud/cloud_file_system.h" +#include "rocksdb/convenience.h" +#include + +#ifdef _WIN32_WINNT +#undef GetMessage +#endif + +namespace ROCKSDB_NAMESPACE { +#ifdef USE_GCP + +static bool IsNotFound(gcp::Status const& status) { + return (status.code() == gcp::StatusCode::kNotFound); +} + +// AWS handle successive slashes in a path as a single slash, but GCS does not. +// So, we make it consistent by reducing multiple slashes to a single slash. +inline std::string normalzie_object_path(std::string const& object_path) { + std::string path = ReduceSlashes(object_path); + return ltrim_if(path, '/'); +} + +class CloudRequestCallbackGuard { + public: + CloudRequestCallbackGuard(CloudRequestCallback* callback, + CloudRequestOpType type, uint64_t size = 0) + : callback_(callback), type_(type), size_(size), start_(now()) {} + + ~CloudRequestCallbackGuard() { + if (callback_) { + (*callback_)(type_, size_, now() - start_, success_); + } + } + + void SetSize(uint64_t size) { size_ = size; } + void SetSuccess(bool success) { success_ = success; } + + private: + uint64_t now() { + return std::chrono::duration_cast( + std::chrono::system_clock::now() - + std::chrono::system_clock::from_time_t(0)) + .count(); + } + CloudRequestCallback* callback_; + CloudRequestOpType type_; + uint64_t size_; + bool success_{false}; + uint64_t start_; +}; + +/******************** GCSClientWrapper ******************/ + +class GCSClientWrapper { + public: + explicit GCSClientWrapper(CloudFileSystemOptions const& cloud_options, + gcp::Options gcp_options) + : cloud_request_callback_(cloud_options.cloud_request_callback) { + if (cloud_options.gcs_client_factory) { + client_ = cloud_options.gcs_client_factory(gcp_options); + } else { + client_ = std::make_shared(gcp_options); + } + } + + gcp::StatusOr CreateBucket( + std::string bucket_name, gcs::BucketMetadata metadata) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kCreateOp); + gcp::StatusOr bucket_metadata = + client_->CreateBucket(bucket_name, metadata); + t.SetSuccess(bucket_metadata.ok()); + return bucket_metadata; + } + + gcp::StatusOr ListCloudObjects( + std::string bucket_name, std::string prefix, int /*maxium*/) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kListOp); + gcp::StatusOr objects = client_->ListObjects( + bucket_name, gcs::Prefix(prefix) /*, gcs::MaxResults(maxium)*/); + t.SetSuccess(objects.ok()); + return objects; + } + + gcp::StatusOr HeadBucket(std::string bucket_name) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kInfoOp); + gcp::StatusOr bucket_metadata = + client_->GetBucketMetadata(bucket_name); + t.SetSuccess(bucket_metadata.ok()); + return bucket_metadata; + } + + gcp::Status DeleteCloudObject(std::string bucket_name, + std::string object_path) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kDeleteOp); + gcp::Status del = client_->DeleteObject(bucket_name, object_path); + t.SetSuccess(del.ok()); + return del; + } + + gcp::StatusOr CopyCloudObject( + std::string src_bucketname, std::string src_objectpath, + std::string dst_bucketname, std::string dst_objectpath) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kCopyOp); + gcp::StatusOr object_metadata = client_->CopyObject( + src_bucketname, src_objectpath, dst_bucketname, dst_objectpath); + t.SetSuccess(object_metadata.ok()); + return object_metadata; + } + + gcp::Status GetCloudObject(std::string bucket, std::string object, + int64_t start, size_t n, char* buf, + uint64_t* bytes_read) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kReadOp); + // create a range read request + // Ranges are inclusive, so we can't read 0 bytes; read 1 instead and + // drop it later + size_t rangeLen = (n != 0 ? n : 1); + uint64_t end = start + rangeLen; + *bytes_read = 0; + + gcs::ObjectReadStream obj = + client_->ReadObject(bucket, object, gcs::ReadRange(start, end)); + if (obj.bad()) { + return obj.status(); + } + + if (n != 0) { + obj.read(buf, n); + *bytes_read = obj.gcount(); + assert(*bytes_read <= n); + } + + t.SetSize(*bytes_read); + t.SetSuccess(true); + + return obj.status(); + } + + gcp::Status DownloadFile(std::string bucket_name, std::string object_path, + std::string dst_file, uint64_t* file_size) { + CloudRequestCallbackGuard guard(cloud_request_callback_.get(), + CloudRequestOpType::kReadOp); + + gcs::ObjectReadStream os = client_->ReadObject(bucket_name, object_path); + if (os.bad()) { + guard.SetSize(0); + guard.SetSuccess(false); + return os.status(); + } + + std::ofstream ofs(dst_file, std::ofstream::binary); + // if ofs is not open, return error with dst_file name in message + if (!ofs.is_open()) { + guard.SetSize(0); + guard.SetSuccess(false); + std::string errmsg("Unable to open dest file "); + errmsg.append(dst_file); + return gcp::Status(gcp::StatusCode::kInternal, errmsg); + } + + // Read stream for os and write to dst_file, then set the file size for + // guard + ofs << os.rdbuf(); + ofs.close(); + *file_size = os.size().value(); + guard.SetSize(*file_size); + guard.SetSuccess(true); + return gcp::Status(gcp::StatusCode::kOk, "OK"); + } + + // update object metadata + gcp::StatusOr PutCloudObject( + std::string bucket_name, std::string object_path, + std::unordered_map metadata, + uint64_t size_hint = 0) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kWriteOp, size_hint); + gcp::StatusOr object_meta = + client_->InsertObject(bucket_name, object_path, ""); + if (!object_meta.ok()) { + t.SetSuccess(false); + return object_meta; + } + gcs::ObjectMetadata new_object_meta = object_meta.value(); + for (auto kv : metadata) { + new_object_meta.mutable_metadata().emplace(kv.first, kv.second); + } + auto update_meta = + client_->UpdateObject(bucket_name, object_path, new_object_meta); + return update_meta; + } + + gcp::StatusOr UploadFile(std::string bucket_name, + std::string object_path, + std::string loc_file) { + CloudRequestCallbackGuard guard(cloud_request_callback_.get(), + CloudRequestOpType::kWriteOp); + + gcp::StatusOr object_meta = + client_->UploadFile(loc_file, bucket_name, object_path); + + if (!object_meta.ok()) { + guard.SetSize(0); + guard.SetSuccess(false); + return object_meta; + } + + guard.SetSize(object_meta.value().size()); + guard.SetSuccess(true); + + return object_meta; + } + + gcp::StatusOr HeadObject(std::string bucket_name, + std::string object_path) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kInfoOp); + gcp::StatusOr object_metadata = + client_->GetObjectMetadata(bucket_name, object_path); + t.SetSuccess(object_metadata.ok()); + return object_metadata; + } + + CloudRequestCallback* GetRequestCallback() { + return cloud_request_callback_.get(); + } + + private: + std::shared_ptr client_; + std::shared_ptr cloud_request_callback_; +}; + +/******************** GcsReadableFile ******************/ +class GcsReadableFile : public CloudStorageReadableFileImpl { + public: + GcsReadableFile(std::shared_ptr const& gcs_client, + Logger* info_log, std::string const& bucket, + std::string const& fname, uint64_t size, + std::string content_hash) + : CloudStorageReadableFileImpl(info_log, bucket, fname, size), + gcs_client_(gcs_client), + content_hash_(std::move(content_hash)) {} + + virtual char const* Type() const { return "gcs"; } + + size_t GetUniqueId(char* id, size_t max_size) const override { + if (content_hash_.empty()) { + return 0; + } + + max_size = std::min(content_hash_.size(), max_size); + memcpy(id, content_hash_.c_str(), max_size); + return max_size; + } + + // random access, read data from specified offset in file + IOStatus DoCloudRead(uint64_t offset, size_t n, IOOptions const& /*options*/, + char* scratch, uint64_t* bytes_read, + IODebugContext* /*dbg*/) const override { + // read the range + auto status = gcs_client_->GetCloudObject(bucket_, fname_, offset, n, + scratch, bytes_read); + if (!status.ok()) { + if (IsNotFound(status)) { + Log(InfoLogLevel::ERROR_LEVEL, info_log_, + "[gcs] GcsReadableFile ReadObject Not Found %s \n", fname_.c_str()); + return IOStatus::NotFound(); + } else { + Log(InfoLogLevel::ERROR_LEVEL, info_log_, + "[gcs] GcsReadableFile ReadObject error %s offset %" PRIu64 + " rangelen %" ROCKSDB_PRIszt ", message: %s\n", + fname_.c_str(), offset, n, status.message().c_str()); + return IOStatus::IOError(fname_.c_str(), status.message().c_str()); + } + } + + return IOStatus::OK(); + } + + private: + std::shared_ptr const& gcs_client_; + std::string content_hash_; +}; // End class GcsReadableFile + +/******************** Writablefile ******************/ + +class GcsWritableFile : public CloudStorageWritableFileImpl { + public: + GcsWritableFile(CloudFileSystem* fs, std::string const& local_fname, + std::string const& bucket, std::string const& cloud_fname, + FileOptions const& options) + : CloudStorageWritableFileImpl(fs, local_fname, bucket, cloud_fname, + options) {} + virtual char const* Name() const override { + return CloudStorageProviderImpl::kGcs(); + } +}; // End class GcsWritableFile + +/******************** GcsStorageProvider ******************/ +class GcsStorageProvider : public CloudStorageProviderImpl { + public: + ~GcsStorageProvider() override {} + virtual char const* Name() const override { return kGcs(); } + IOStatus CreateBucket(std::string const& bucket) override; + IOStatus ExistsBucket(std::string const& bucket) override; + IOStatus EmptyBucket(std::string const& bucket_name, + std::string const& object_path) override; + IOStatus DeleteCloudObject(std::string const& bucket_name, + std::string const& object_path) override; + IOStatus ListCloudObjects(std::string const& bucket_name, + std::string const& object_path, + std::vector* result) override; + IOStatus ExistsCloudObject(std::string const& bucket_name, + std::string const& object_path) override; + IOStatus GetCloudObjectSize(std::string const& bucket_name, + std::string const& object_path, + uint64_t* filesize) override; + IOStatus GetCloudObjectModificationTime(std::string const& bucket_name, + std::string const& object_path, + uint64_t* time) override; + IOStatus GetCloudObjectMetadata(std::string const& bucket_name, + std::string const& object_path, + CloudObjectInformation* info) override; + IOStatus PutCloudObjectMetadata( + std::string const& bucket_name, std::string const& object_path, + std::unordered_map const& metadata) override; + IOStatus CopyCloudObject(std::string const& bucket_name_src, + std::string const& object_path_src, + std::string const& bucket_name_dest, + std::string const& object_path_dest) override; + IOStatus DoNewCloudReadableFile( + std::string const& bucket, std::string const& fname, uint64_t fsize, + std::string const& content_hash, FileOptions const& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + IOStatus NewCloudWritableFile( + std::string const& local_path, std::string const& bucket_name, + std::string const& object_path, FileOptions const& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + Status PrepareOptions(ConfigOptions const& options) override; + + protected: + IOStatus DoGetCloudObject(std::string const& bucket_name, + std::string const& object_path, + std::string const& destination, + uint64_t* remote_size) override; + IOStatus DoPutCloudObject(std::string const& local_file, + std::string const& bucket_name, + std::string const& object_path, + uint64_t file_size) override; + + private: + struct HeadObjectResult { + // If any of the field is non-nullptr, returns requested data + std::unordered_map* metadata = nullptr; + uint64_t* size = nullptr; + uint64_t* modtime = nullptr; + std::string* etag = nullptr; + }; + + // Retrieves metadata from an object + IOStatus HeadObject(std::string const& bucket, std::string const& path, + HeadObjectResult* result); + + // The Gcs client + std::shared_ptr gcs_client_; +}; // End class GcsStorageProvider + +/******************** GcsFileSystem ******************/ +IOStatus GcsStorageProvider::CreateBucket(std::string const& bucket) { + std::string bucket_location = + cfs_->GetCloudFileSystemOptions().dest_bucket.GetRegion(); + // storage_class: https://cloud.google.com/storage/docs/storage-classes + // default storage_class = STANDARD + std::string sc("STANDARD"); + auto bucket_metadata = gcs_client_->CreateBucket( + bucket, gcs::BucketMetadata().set_storage_class(sc).set_location( + bucket_location)); + if (!bucket_metadata.ok()) { + // Bucket already exists is not an error + if (gcp::StatusCode::kAlreadyExists != bucket_metadata.status().code()) { + std::string errmsg(bucket_metadata.status().message()); + return IOStatus::IOError(bucket.c_str(), errmsg.c_str()); + } + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::ExistsBucket(std::string const& bucket) { + gcp::StatusOr bucketmetadata = + gcs_client_->HeadBucket(bucket); + if (IsNotFound(bucketmetadata.status())) { + return IOStatus::NotFound(); + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::EmptyBucket(std::string const& bucket_name, + std::string const& object_path) { + std::vector results; + auto st = ListCloudObjects(bucket_name, object_path, &results); + if (!st.ok()) { + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] EmptyBucket unable to find objects in bucket %s %s", + bucket_name.c_str(), st.ToString().c_str()); + return st; + } + Log(InfoLogLevel::DEBUG_LEVEL, cfs_->GetLogger(), + "[Gcs] EmptyBucket going to delete %" ROCKSDB_PRIszt + " objects in bucket %s", + results.size(), bucket_name.c_str()); + + // Delete all objects from bucket + for (auto const& path : results) { + st = DeleteCloudObject(bucket_name, object_path + "/" + path); + if (!st.ok()) { + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] EmptyBucket Unable to delete %s in bucket %s %s", path.c_str(), + bucket_name.c_str(), st.ToString().c_str()); + return st; + } + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::DeleteCloudObject(std::string const& bucket_name, + std::string const& object_path) { + auto normalized_path = normalzie_object_path(object_path); + auto st = gcs_client_->DeleteCloudObject(bucket_name, normalized_path); + if (!st.ok()) { + if (IsNotFound(st)) { + return IOStatus::NotFound(object_path, st.message().c_str()); + } else { + return IOStatus::IOError(object_path, st.message().c_str()); + } + } + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[Gcs] DeleteFromGcs %s/%s, status %s", bucket_name.c_str(), + object_path.c_str(), st.message().c_str()); + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::ListCloudObjects( + std::string const& bucket_name, std::string const& object_path, + std::vector* result) { + // follow with aws_s3 + auto prefix = normalzie_object_path(object_path); + prefix = ensure_ends_with_pathsep(prefix); + // MaxResults is about page limits + // https://stackoverflow.com/questions/77069696/how-to-limit-number-of-objects-returned-from-listobjects + auto objects = gcs_client_->ListCloudObjects( + bucket_name, prefix, + cfs_->GetCloudFileSystemOptions().number_objects_listed_in_one_iteration); + if (!objects.ok()) { + std::string errmsg(objects.status().message()); + if (IsNotFound(objects.status())) { + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] GetChildren dir %s does not exist: %s", object_path.c_str(), + errmsg.c_str()); + return IOStatus::NotFound(object_path, errmsg.c_str()); + } + return IOStatus::IOError(object_path, errmsg.c_str()); + } + for (auto const& obj : objects.value()) { + // Our path should be a prefix of the fetched value + std::string name = obj.value().name(); + if (name.find(prefix) != 0) { // npos or other value + return IOStatus::IOError("Unexpected result from Gcs: " + name); + } + auto fname = name.substr(prefix.size()); + result->push_back(std::move(fname)); + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::ExistsCloudObject(std::string const& bucket_name, + std::string const& object_path) { + HeadObjectResult result; + return HeadObject(bucket_name, object_path, &result); +} + +IOStatus GcsStorageProvider::GetCloudObjectSize(std::string const& bucket_name, + std::string const& object_path, + uint64_t* filesize) { + HeadObjectResult result; + result.size = filesize; + return HeadObject(bucket_name, object_path, &result); +} + +IOStatus GcsStorageProvider::GetCloudObjectModificationTime( + std::string const& bucket_name, std::string const& object_path, + uint64_t* time) { + HeadObjectResult result; + result.modtime = time; + return HeadObject(bucket_name, object_path, &result); +} + +IOStatus GcsStorageProvider::GetCloudObjectMetadata( + std::string const& bucket_name, std::string const& object_path, + CloudObjectInformation* info) { + assert(info != nullptr); + HeadObjectResult result; + result.metadata = &info->metadata; + result.size = &info->size; + result.modtime = &info->modification_time; + result.etag = &info->content_hash; + return HeadObject(bucket_name, object_path, &result); +} + +IOStatus GcsStorageProvider::PutCloudObjectMetadata( + std::string const& bucket_name, std::string const& object_path, + std::unordered_map const& metadata) { + auto normalized_path = normalzie_object_path(object_path); + auto outcome = + gcs_client_->PutCloudObject(bucket_name, normalized_path, metadata); + if (!outcome.ok()) { + auto const& error = outcome.status().message(); + std::string errmsg(error.c_str(), error.size()); + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] Bucket %s error in saving metadata %s", bucket_name.c_str(), + errmsg.c_str()); + return IOStatus::IOError(object_path, errmsg.c_str()); + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::CopyCloudObject( + std::string const& bucket_name_src, std::string const& object_path_src, + std::string const& bucket_name_dest, std::string const& object_path_dest) { + std::string src_url = bucket_name_src + object_path_src; + auto normalized_src_path = normalzie_object_path(object_path_src); + auto normalized_dest_path = normalzie_object_path(object_path_dest); + auto copy = + gcs_client_->CopyCloudObject(bucket_name_src, normalized_src_path, + bucket_name_dest, normalized_dest_path); + if (!copy.ok()) { + auto const& error = copy.status().message(); + std::string errmsg(error.c_str(), error.size()); + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] GcsWritableFile src path %s error in copying to %s %s", + src_url.c_str(), object_path_dest.c_str(), errmsg.c_str()); + return IOStatus::IOError(object_path_dest.c_str(), errmsg.c_str()); + } + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[Gcs] GcsWritableFile src path %s copied to %s OK", src_url.c_str(), + object_path_dest.c_str()); + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::DoNewCloudReadableFile( + std::string const& bucket, std::string const& fname, uint64_t fsize, + std::string const& content_hash, FileOptions const& /*options*/, + std::unique_ptr* result, + IODebugContext* /*dbg*/) { + auto normalized_path = normalzie_object_path(fname); + result->reset(new GcsReadableFile(gcs_client_, cfs_->GetLogger(), bucket, + normalized_path, fsize, content_hash)); + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::NewCloudWritableFile( + std::string const& local_path, std::string const& bucket_name, + std::string const& object_path, FileOptions const& file_opts, + std::unique_ptr* result, + IODebugContext* /*dbg*/) { + auto normalized_path = normalzie_object_path(object_path); + result->reset(new GcsWritableFile(cfs_, local_path, bucket_name, + normalized_path, file_opts)); + return (*result)->status(); +} + +Status GcsStorageProvider::PrepareOptions(ConfigOptions const& options) { + auto cfs = dynamic_cast(options.env->GetFileSystem().get()); + assert(cfs); + auto const& cloud_opts = cfs->GetCloudFileSystemOptions(); + if (std::string(cfs->Name()) != CloudFileSystemImpl::kGcp()) { + return Status::InvalidArgument("gcs Provider requires gcp Environment"); + } + // TODO: support buckets being in different regions + if (!cfs->SrcMatchesDest() && cfs->HasSrcBucket() && cfs->HasDestBucket()) { + if (cloud_opts.src_bucket.GetRegion() != + cloud_opts.dest_bucket.GetRegion()) { + Log(InfoLogLevel::ERROR_LEVEL, cfs->GetLogger(), + "[gcp] NewGcpFileSystem Buckets %s, %s in two different regions %s, " + "%s is not supported", + cloud_opts.src_bucket.GetBucketName().c_str(), + cloud_opts.dest_bucket.GetBucketName().c_str(), + cloud_opts.src_bucket.GetRegion().c_str(), + cloud_opts.dest_bucket.GetRegion().c_str()); + return Status::InvalidArgument("Two different regions not supported"); + } + } + // initialize the Gcs client + gcp::Options gcp_options; + Status status = GcpCloudOptions::GetClientConfiguration( + cfs, cloud_opts.src_bucket.GetRegion(), gcp_options); + if (status.ok()) { + gcs_client_ = std::make_shared(cloud_opts, gcp_options); + return CloudStorageProviderImpl::PrepareOptions(options); + } + return status; +} + +IOStatus GcsStorageProvider::DoGetCloudObject(std::string const& bucket_name, + std::string const& object_path, + std::string const& destination, + uint64_t* remote_size) { + auto normalized_path = normalzie_object_path(object_path); + auto get = gcs_client_->DownloadFile(bucket_name, normalized_path, + destination, remote_size); + if (!get.ok()) { + std::string errmsg; + errmsg = get.message(); + if (IsNotFound(get)) { + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[gcs] GetObject %s/%s error %s.", bucket_name.c_str(), + object_path.c_str(), errmsg.c_str()); + return IOStatus::NotFound(std::move(errmsg)); + } else { + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] GetObject %s/%s error %s.", bucket_name.c_str(), + object_path.c_str(), errmsg.c_str()); + return IOStatus::IOError(std::move(errmsg)); + } + } + return IOStatus::OK(); +} + +// Uploads local_file to GCS bucket_name/object_path +IOStatus GcsStorageProvider::DoPutCloudObject(std::string const& local_file, + std::string const& bucket_name, + std::string const& object_path, + uint64_t file_size) { + auto normalized_path = normalzie_object_path(object_path); + auto put = gcs_client_->UploadFile(bucket_name, normalized_path, local_file); + if (!put.ok()) { + auto const& error = put.status().message(); + std::string errmsg(error.c_str(), error.size()); + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[s3] PutCloudObject %s/%s, size %" PRIu64 ", ERROR %s", + bucket_name.c_str(), object_path.c_str(), file_size, errmsg.c_str()); + return IOStatus::IOError(local_file, errmsg); + } + + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] PutCloudObject %s/%s, size %" PRIu64 ", OK", bucket_name.c_str(), + object_path.c_str(), file_size); + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::HeadObject(std::string const& bucket, + std::string const& path, + HeadObjectResult* result) { + assert(result != nullptr); + auto object_path = normalzie_object_path(path); + auto head = gcs_client_->HeadObject(bucket, object_path); + if (!head.ok()) { + auto const& errMessage = head.status().message(); + Slice object_path_slice(object_path.data(), object_path.size()); + if (IsNotFound(head.status())) { + return IOStatus::NotFound(object_path_slice, errMessage.c_str()); + } else { + return IOStatus::IOError(object_path_slice, errMessage.c_str()); + } + } + + auto const& head_val = head.value(); + if (result->metadata != nullptr) { + // std::map metadata_ + for (auto const& m : head_val.metadata()) { + (*(result->metadata))[m.first.c_str()] = m.second.c_str(); + } + } + if (result->size != nullptr) { + *(result->size) = head_val.size(); + } + if ((result->modtime) != nullptr) { + int64_t modtime = std::chrono::duration_cast( + head_val.updated().time_since_epoch()) + .count(); + *(result->modtime) = modtime; + } + if ((result->etag) != nullptr) { + *(result->etag) = + std::string(head_val.etag().data(), head_val.etag().length()); + } + return IOStatus::OK(); +} + +#endif // USE_GCP + +Status CloudStorageProviderImpl::CreateGcsProvider( + std::unique_ptr* provider) { +#ifndef USE_GCP + provider->reset(); + return Status::NotSupported( + "In order to use Google Cloud Storage, make sure you're compiling with " + "USE_GCS=1"); +#else + provider->reset(new GcsStorageProvider()); + return Status::OK(); +#endif +} +} // namespace ROCKSDB_NAMESPACE +#endif // ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/gcp/gcp_db_cloud_test.cc b/cloud/gcp/gcp_db_cloud_test.cc new file mode 100644 index 00000000000..270ae4b924d --- /dev/null +++ b/cloud/gcp/gcp_db_cloud_test.cc @@ -0,0 +1,3335 @@ +// Copyright (c) 2017 Rockset + +#ifndef ROCKSDB_LITE +#ifdef USE_GCP + +#include "cloud/cloud_file_deletion_scheduler.h" +#include "cloud/cloud_file_system_impl.h" +#include "cloud/cloud_scheduler.h" +#include "cloud/cloud_storage_provider_impl.h" +#include "cloud/db_cloud_impl.h" +#include "cloud/filename.h" +#include "cloud/manifest_reader.h" +#include "db/db_impl/db_impl.h" +#include "db/db_test_util.h" +#include "file/filename.h" +#include "logging/logging.h" +#include "rocksdb/cloud/cloud_file_system.h" +#include "rocksdb/cloud/db_cloud.h" +#include "rocksdb/options.h" +#include "rocksdb/status.h" +#include "rocksdb/table.h" +#include "test_util/sync_point.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" +#include "util/random.h" +#include "util/string_util.h" +#include +#include +#include +#include +#ifndef OS_WIN +#include +#endif + +namespace ROCKSDB_NAMESPACE { + +namespace { +const FileOptions kFileOptions; +const IOOptions kIOOptions; +IODebugContext* const kDbg = nullptr; +} // namespace + +class CloudTest : public testing::Test { + public: + CloudTest() { + Random64 rng(time(nullptr)); + test_id_ = std::to_string(rng.Next()); + fprintf(stderr, "Test ID: %s\n", test_id_.c_str()); + + base_env_ = Env::Default(); + dbname_ = test::TmpDir() + "/db_cloud-" + test_id_; + clone_dir_ = test::TmpDir() + "/ctest-" + test_id_; + cloud_fs_options_.TEST_Initialize("dbcloudtest.", dbname_); + cloud_fs_options_.resync_manifest_on_open = true; + // To catch any possible file deletion bugs, cloud files are deleted + // right away + cloud_fs_options_.cloud_file_deletion_delay = std::chrono::seconds(0); + + options_.create_if_missing = true; + options_.stats_dump_period_sec = 0; + options_.stats_persist_period_sec = 0; + persistent_cache_path_ = ""; + persistent_cache_size_gb_ = 0; + db_ = nullptr; + + DestroyDir(dbname_); + base_env_->CreateDirIfMissing(dbname_); + base_env_->NewLogger(test::TmpDir(base_env_) + "/rocksdb-cloud.log", + &options_.info_log); + options_.info_log->SetInfoLogLevel(InfoLogLevel::DEBUG_LEVEL); + + Cleanup(); + } + + void Cleanup() { + ASSERT_TRUE(!aenv_); + + // check cloud credentials + ASSERT_TRUE(cloud_fs_options_.credentials.HasValid().ok()); + + CloudFileSystem* gfs; + // create a dummy gfs env + ASSERT_OK(CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), + cloud_fs_options_, + options_.info_log, &gfs)); + ASSERT_NE(gfs, nullptr); + // delete all pre-existing contents from the bucket + auto st = gfs->GetStorageProvider()->EmptyBucket(gfs->GetSrcBucketName(), + dbname_); + delete gfs; + ASSERT_TRUE(st.ok() || st.IsNotFound()); + + DestroyDir(clone_dir_); + ASSERT_OK(base_env_->CreateDir(clone_dir_)); + } + + std::set GetSSTFiles(std::string name) { + std::vector files; + GetCloudFileSystem()->GetBaseFileSystem()->GetChildren(name, kIOOptions, + &files, kDbg); + std::set sst_files; + for (auto& f : files) { + if (IsSstFile(RemoveEpoch(f))) { + sst_files.insert(f); + } + } + return sst_files; + } + + // Return total size of all sst files available locally + void GetSSTFilesTotalSize(std::string name, uint64_t* total_size) { + std::vector files; + GetCloudFileSystem()->GetBaseFileSystem()->GetChildren(name, kIOOptions, + &files, kDbg); + std::set sst_files; + uint64_t local_size = 0; + for (auto& f : files) { + if (IsSstFile(RemoveEpoch(f))) { + sst_files.insert(f); + std::string lpath = dbname_ + "/" + f; + ASSERT_OK(GetCloudFileSystem()->GetBaseFileSystem()->GetFileSize( + lpath, kIOOptions, &local_size, kDbg)); + (*total_size) += local_size; + } + } + } + + std::set GetSSTFilesClone(std::string name) { + std::string cname = clone_dir_ + "/" + name; + return GetSSTFiles(cname); + } + + void DestroyDir(std::string const& dir) { + std::string cmd = "rm -rf " + dir; + int rc = system(cmd.c_str()); + ASSERT_EQ(rc, 0); + } + + virtual ~CloudTest() { + // Cleanup the cloud bucket + if (!cloud_fs_options_.src_bucket.GetBucketName().empty()) { + CloudFileSystem* gfs; + Status st = CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), + cloud_fs_options_, + options_.info_log, &gfs); + if (st.ok()) { + gfs->GetStorageProvider()->EmptyBucket(gfs->GetSrcBucketName(), + dbname_); + delete gfs; + } + } + + CloseDB(); + } + + void CreateCloudEnv() { + CloudFileSystem* cfs; + ASSERT_OK(CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), + cloud_fs_options_, + options_.info_log, &cfs)); + std::shared_ptr fs(cfs); + aenv_ = CloudFileSystem::NewCompositeEnv(base_env_, std::move(fs)); + } + + // Open database via the cloud interface + void OpenDB() { + std::vector handles; + OpenDB(&handles); + // Delete the handle for the default column family because the DBImpl + // always holds a reference to it. + ASSERT_TRUE(handles.size() > 0); + delete handles[0]; + } + + // Open database via the cloud interface + void OpenDB(std::vector* handles) { + // default column family + OpenWithColumnFamilies({kDefaultColumnFamilyName}, handles); + } + + void OpenWithColumnFamilies(std::vector const& cfs, + std::vector* handles) { + ASSERT_TRUE(cloud_fs_options_.credentials.HasValid().ok()); + + // Create new AWS env + CreateCloudEnv(); + options_.env = aenv_.get(); + // Sleep for a second because S3 is eventual consistency. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + ASSERT_TRUE(db_ == nullptr); + std::vector column_families; + for (size_t i = 0; i < cfs.size(); ++i) { + column_families.emplace_back(cfs[i], options_); + } + ASSERT_OK(DBCloud::Open(options_, dbname_, column_families, + persistent_cache_path_, persistent_cache_size_gb_, + handles, &db_)); + ASSERT_OK(db_->GetDbIdentity(dbid_)); + } + + // Try to open and return status + Status checkOpen() { + // Create new AWS env + CreateCloudEnv(); + options_.env = aenv_.get(); + // Sleep for a second because S3 is eventual consistency. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + return DBCloud::Open(options_, dbname_, persistent_cache_path_, + persistent_cache_size_gb_, &db_); + } + + void CreateColumnFamilies(std::vector const& cfs, + std::vector* handles) { + ASSERT_NE(db_, nullptr); + size_t cfi = handles->size(); + handles->resize(cfi + cfs.size()); + for (auto cf : cfs) { + ASSERT_OK(db_->CreateColumnFamily(options_, cf, &handles->at(cfi++))); + } + } + + // Creates and Opens a clone + Status CloneDB(std::string const& clone_name, + std::string const& dest_bucket_name, + std::string const& dest_object_path, + std::unique_ptr* cloud_db, std::unique_ptr* env, + bool force_keep_local_on_invalid_dest_bucket = true) { + // The local directory where the clone resides + std::string cname = clone_dir_ + "/" + clone_name; + + CloudFileSystem* cfs; + DBCloud* clone_db; + + // If there is no destination bucket, then the clone needs to copy + // all sst fies from source bucket to local dir + auto copt = cloud_fs_options_; + if (dest_bucket_name == copt.src_bucket.GetBucketName()) { + copt.dest_bucket = copt.src_bucket; + } else { + copt.dest_bucket.SetBucketName(dest_bucket_name); + } + copt.dest_bucket.SetObjectPath(dest_object_path); + if (!copt.dest_bucket.IsValid() && + force_keep_local_on_invalid_dest_bucket) { + copt.keep_local_sst_files = true; + } + // Create new AWS env + Status st = CloudFileSystem::NewGcpFileSystem( + base_env_->GetFileSystem(), copt, options_.info_log, &cfs); + if (!st.ok()) { + return st; + } + + // sets the env to be used by the env wrapper, and returns that env + env->reset( + new CompositeEnvWrapper(base_env_, std::shared_ptr(cfs))); + options_.env = env->get(); + + // default column family + ColumnFamilyOptions cfopt = options_; + + std::vector column_families; + column_families.emplace_back( + ColumnFamilyDescriptor(kDefaultColumnFamilyName, cfopt)); + std::vector handles; + + st = DBCloud::Open(options_, cname, column_families, persistent_cache_path_, + persistent_cache_size_gb_, &handles, &clone_db); + if (!st.ok()) { + return st; + } + + cloud_db->reset(clone_db); + + // Delete the handle for the default column family because the DBImpl + // always holds a reference to it. + assert(handles.size() > 0); + delete handles[0]; + + return st; + } + + void CloseDB(std::vector* handles) { + for (auto h : *handles) { + delete h; + } + handles->clear(); + CloseDB(); + } + + void CloseDB() { + if (db_) { + db_->Flush(FlushOptions()); // convert pending writes to sst files + delete db_; + db_ = nullptr; + } + } + + void SetPersistentCache(std::string const& path, uint64_t size_gb) { + persistent_cache_path_ = path; + persistent_cache_size_gb_ = size_gb; + } + + Status GetCloudLiveFilesSrc(std::set* list) { + auto* cfs = GetCloudFileSystem(); + std::unique_ptr manifest( + new ManifestReader(options_.info_log, cfs, cfs->GetSrcBucketName())); + return manifest->GetLiveFiles(cfs->GetSrcObjectPath(), list); + } + + // Verify that local files are the same as cloud files in src bucket path + void ValidateCloudLiveFilesSrcSize() { + // Loop though all the files in the cloud manifest + std::set cloud_files; + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + for (uint64_t num : cloud_files) { + std::string pathname = MakeTableFileName(dbname_, num); + Log(options_.info_log, "cloud file list %s\n", pathname.c_str()); + } + + std::set localFiles = GetSSTFiles(dbname_); + uint64_t cloudSize = 0; + uint64_t localSize = 0; + + // loop through all the local files and validate + for (std::string path : localFiles) { + std::string cpath = GetCloudFileSystem()->GetSrcObjectPath() + "/" + path; + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->GetCloudObjectSize( + GetCloudFileSystem()->GetSrcBucketName(), cpath, &cloudSize)); + + // find the size of the file on local storage + std::string lpath = dbname_ + "/" + path; + ASSERT_OK(GetCloudFileSystem()->GetBaseFileSystem()->GetFileSize( + lpath, kIOOptions, &localSize, kDbg)); + ASSERT_TRUE(localSize == cloudSize); + Log(options_.info_log, "local file %s size %" PRIu64 "\n", lpath.c_str(), + localSize); + Log(options_.info_log, "cloud file %s size %" PRIu64 "\n", cpath.c_str(), + cloudSize); + printf("local file %s size %" PRIu64 "\n", lpath.c_str(), localSize); + printf("cloud file %s size %" PRIu64 "\n", cpath.c_str(), cloudSize); + } + } + + CloudFileSystem* GetCloudFileSystem() const { + EXPECT_TRUE(aenv_); + return static_cast(aenv_->GetFileSystem().get()); + } + CloudFileSystemImpl* GetCloudFileSystemImpl() const { + EXPECT_TRUE(aenv_); + return static_cast(aenv_->GetFileSystem().get()); + } + + DBImpl* GetDBImpl() const { return static_cast(db_->GetBaseDB()); } + + Status SwitchToNewCookie(std::string new_cookie) { + CloudManifestDelta delta{db_->GetNextFileNumber(), new_cookie}; + return ApplyCMDeltaToCloudDB(delta); + } + + Status ApplyCMDeltaToCloudDB(CloudManifestDelta const& delta) { + auto st = GetCloudFileSystem()->RollNewCookie(dbname_, delta.epoch, delta); + if (!st.ok()) { + return st; + } + bool applied = false; + st = GetCloudFileSystem()->ApplyCloudManifestDelta(delta, &applied); + assert(applied); + if (!st.ok()) { + return st; + } + db_->NewManifestOnNextUpdate(); + return st; + } + + protected: + void WaitUntilNoScheduledJobs() { + while (true) { + auto num = GetCloudFileSystemImpl()->TEST_NumScheduledJobs(); + if (num > 0) { + usleep(100); + } else { + return; + } + } + } + + std::vector GetAllLocalFiles() { + std::vector local_files; + assert(base_env_->GetChildrenFileAttributes(dbname_, &local_files).ok()); + return local_files; + } + + // Generate a few obsolete sst files on an empty db + static void GenerateObsoleteFilesOnEmptyDB( + DBImpl* db, CloudFileSystem* cfs, + std::vector* obsolete_files) { + ASSERT_OK(db->Put({}, "k1", "v1")); + ASSERT_OK(db->Flush({})); + + ASSERT_OK(db->Put({}, "k1", "v2")); + ASSERT_OK(db->Flush({})); + + std::vector sst_files; + db->GetLiveFilesMetaData(&sst_files); + ASSERT_EQ(sst_files.size(), 2); + for (auto& f : sst_files) { + obsolete_files->push_back(cfs->RemapFilename(f.relative_filename)); + } + + // trigger compaction, so previous 2 sst files will be obsolete + ASSERT_OK(db->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); + sst_files.clear(); + db->GetLiveFilesMetaData(&sst_files); + ASSERT_EQ(sst_files.size(), 1); + } + + // check that fname existsin in src bucket/object path + rocksdb::Status ExistsCloudObject(std::string const& filename) const { + return GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + pathsep + filename); + } + + std::string test_id_; + Env* base_env_; + Options options_; + std::string dbname_; + std::string clone_dir_; + CloudFileSystemOptions cloud_fs_options_; + std::string dbid_; + std::string persistent_cache_path_; + uint64_t persistent_cache_size_gb_; + DBCloud* db_; + std::unique_ptr aenv_; +}; + +// +// Most basic test. Create DB, write one key, close it and then check to see +// that the key exists. +// +TEST_F(CloudTest, BasicTest) { + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + value.clear(); + + // Reopen and validate + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + + std::set live_files; + ASSERT_OK(GetCloudLiveFilesSrc(&live_files)); + ASSERT_GT(live_files.size(), 0); + CloseDB(); +} + +TEST_F(CloudTest, FindAllLiveFilesTest) { + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Flush(FlushOptions())); + + // wait until files are persisted into s3 + GetDBImpl()->TEST_WaitForBackgroundWork(); + + CloseDB(); + + std::vector tablefiles; + std::string manifest; + // fetch latest manifest to local + ASSERT_OK( + GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + EXPECT_EQ(tablefiles.size(), 1); + + for (auto name : tablefiles) { + EXPECT_EQ(GetFileType(name), RocksDBFileType::kSstFile); + // verify that the sst file indeed exists in cloud + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + pathsep + name)); + } + + EXPECT_EQ(GetFileType(manifest), RocksDBFileType::kManifestFile); + // verify that manifest file indeed exists in cloud + auto storage_provider = GetCloudFileSystem()->GetStorageProvider(); + auto bucket_name = GetCloudFileSystem()->GetSrcBucketName(); + auto object_path = + GetCloudFileSystem()->GetSrcObjectPath() + pathsep + manifest; + EXPECT_OK(storage_provider->ExistsCloudObject(bucket_name, object_path)); +} + +// Files of dropped CF should not be included in live files +TEST_F(CloudTest, LiveFilesOfDroppedCFTest) { + std::vector handles; + OpenDB(&handles); + + std::vector tablefiles; + std::string manifest; + ASSERT_OK( + GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + + EXPECT_TRUE(tablefiles.empty()); + CreateColumnFamilies({"cf1"}, &handles); + + // write to CF + ASSERT_OK(db_->Put(WriteOptions(), handles[1], "hello", "world")); + // flush cf1 + ASSERT_OK(db_->Flush({}, handles[1])); + + tablefiles.clear(); + ASSERT_OK( + GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + EXPECT_TRUE(tablefiles.size() == 1); + + // Drop the CF + ASSERT_OK(db_->DropColumnFamily(handles[1])); + tablefiles.clear(); + // make sure that files are not listed as live for dropped CF + ASSERT_OK( + GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + EXPECT_TRUE(tablefiles.empty()); + CloseDB(&handles); +} + +// Verifies that when we move files across levels, the files are still listed as +// live files +TEST_F(CloudTest, LiveFilesAfterChangingLevelTest) { + options_.num_levels = 3; + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "a", "1")); + ASSERT_OK(db_->Put(WriteOptions(), "b", "2")); + ASSERT_OK(db_->Flush({})); + auto db_impl = GetDBImpl(); + + std::vector tablefiles_before_move; + std::string manifest; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles( + dbname_, &tablefiles_before_move, &manifest)); + EXPECT_EQ(tablefiles_before_move.size(), 1); + + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 2; + // Move the sst files to another level by compacting entire range + ASSERT_OK(db_->CompactRange(cro, nullptr /* begin */, nullptr /* end */)); + + ASSERT_OK(db_impl->TEST_WaitForBackgroundWork()); + + std::vector tablefiles_after_move; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles( + dbname_, &tablefiles_after_move, &manifest)); + EXPECT_EQ(tablefiles_before_move, tablefiles_after_move); +} + +TEST_F(CloudTest, GetChildrenTest) { + // Create some objects in S3 + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Flush(FlushOptions())); + + CloseDB(); + DestroyDir(dbname_); + OpenDB(); + + std::vector children; + ASSERT_OK(aenv_->GetFileSystem()->GetChildren(dbname_, kIOOptions, &children, + kDbg)); + int sst_files = 0; + for (auto c : children) { + if (IsSstFile(c)) { + sst_files++; + } + } + // This verifies that GetChildren() works on S3. We deleted the S3 file + // locally, so the only way to actually get it through GetChildren() if + // listing S3 buckets works. + EXPECT_EQ(sst_files, 1); +} + +// +// Create and read from a clone. +// +TEST_F(CloudTest, Newdb) { + std::string master_dbid; + std::string newdb1_dbid; + std::string newdb2_dbid; + + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + ASSERT_OK(db_->GetDbIdentity(master_dbid)); + CloseDB(); + value.clear(); + + { + // Create and Open a new ephemeral instance + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("newdb1", "", "", &cloud_db, &env); + + // Retrieve the id of the first reopen + ASSERT_OK(cloud_db->GetDbIdentity(newdb1_dbid)); + + // This is an ephemeral clone. Its dbid is a prefix of the master's. + ASSERT_NE(newdb1_dbid, master_dbid); + auto res = std::mismatch(master_dbid.begin(), master_dbid.end(), + newdb1_dbid.begin()); + ASSERT_TRUE(res.first == master_dbid.end()); + + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // Open master and write one more kv to it. This is written to + // src bucket as well. + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Dhruba", "Borthakur")); + + // check that the newly written kv exists + value.clear(); + ASSERT_OK(db_->Get(ReadOptions(), "Dhruba", &value)); + ASSERT_TRUE(value.compare("Borthakur") == 0); + + // check that the earlier kv exists too + value.clear(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + + // Assert that newdb1 cannot see the second kv because the second kv + // was written to local dir only of the ephemeral clone. + ASSERT_TRUE(cloud_db->Get(ReadOptions(), "Dhruba", &value).IsNotFound()); + } + { + // Create another ephemeral instance using a different local dir but the + // same two buckets as newdb1. This should be identical in contents with + // newdb1. + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("newdb2", "", "", &cloud_db, &env); + + // Retrieve the id of the second clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb2_dbid)); + + // Since we use two different local directories for the two ephemeral + // clones, their dbids should be different from one another + ASSERT_NE(newdb1_dbid, newdb2_dbid); + + // check that both the kvs appear in the new ephemeral clone + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Dhruba", &value)); + ASSERT_TRUE(value.compare("Borthakur") == 0); + } + + CloseDB(); +} + +TEST_F(CloudTest, ColumnFamilies) { + std::vector handles; + // Put one key-value + OpenDB(&handles); + + CreateColumnFamilies({"cf1", "cf2"}, &handles); + + ASSERT_OK(db_->Put(WriteOptions(), handles[0], "hello", "a")); + ASSERT_OK(db_->Put(WriteOptions(), handles[1], "hello", "b")); + ASSERT_OK(db_->Put(WriteOptions(), handles[2], "hello", "c")); + + auto validate = [&]() { + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), handles[0], "hello", &value)); + ASSERT_EQ(value, "a"); + ASSERT_OK(db_->Get(ReadOptions(), handles[1], "hello", &value)); + ASSERT_EQ(value, "b"); + ASSERT_OK(db_->Get(ReadOptions(), handles[2], "hello", &value)); + ASSERT_EQ(value, "c"); + }; + + validate(); + + CloseDB(&handles); + OpenWithColumnFamilies({kDefaultColumnFamilyName, "cf1", "cf2"}, &handles); + + validate(); + CloseDB(&handles); + + // destory local state + DestroyDir(dbname_); + + // new cloud env + CreateCloudEnv(); + options_.env = aenv_.get(); + + std::vector families; + ASSERT_OK(DBCloud::ListColumnFamilies(options_, dbname_, &families)); + std::sort(families.begin(), families.end()); + ASSERT_TRUE(families == std::vector( + {"cf1", "cf2", kDefaultColumnFamilyName})); + + OpenWithColumnFamilies({kDefaultColumnFamilyName, "cf1", "cf2"}, &handles); + validate(); + CloseDB(&handles); +} + +// +// Create and read from a clone. +// +TEST_F(CloudTest, DISABLED_TrueClone) { + std::string master_dbid; + std::string newdb1_dbid; + std::string newdb2_dbid; + std::string newdb3_dbid; + + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + ASSERT_OK(db_->GetDbIdentity(master_dbid)); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseDB(); + value.clear(); + auto clone_path1 = "clone1_path-" + test_id_; + { + // Create a new instance with different src and destination paths. + // This is true clone and should have all the contents of the masterdb + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath1", cloud_fs_options_.src_bucket.GetBucketName(), + clone_path1, &cloud_db, &env); + + // Retrieve the id of the clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb1_dbid)); + + // Since we used the different src and destination paths for both + // the master and clone1, the clone should have its own identity. + ASSERT_NE(master_dbid, newdb1_dbid); + + // check that the original kv appears in the clone + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // write a new value to the clone + ASSERT_OK(cloud_db->Put(WriteOptions(), "Hello", "Clone1")); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("Clone1") == 0); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + } + { + // Reopen clone1 with a different local path + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath2", cloud_fs_options_.src_bucket.GetBucketName(), + clone_path1, &cloud_db, &env); + + // Retrieve the id of the clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb2_dbid)); + ASSERT_EQ(newdb1_dbid, newdb2_dbid); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("Clone1") == 0); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + } + { + // Reopen clone1 with the same local path as above. + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath2", cloud_fs_options_.src_bucket.GetBucketName(), + clone_path1, &cloud_db, &env); + + // Retrieve the id of the clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb2_dbid)); + ASSERT_EQ(newdb1_dbid, newdb2_dbid); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("Clone1") == 0); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + } + auto clone_path2 = "clone2_path-" + test_id_; + { + // Create clone2 + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath3", // xxx try with localpath2 + cloud_fs_options_.src_bucket.GetBucketName(), clone_path2, + &cloud_db, &env); + + // Retrieve the id of the clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb3_dbid)); + ASSERT_NE(newdb2_dbid, newdb3_dbid); + + // verify that data is still as it was in the original db. + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // Assert that there are no redundant sst files + auto* cimpl = static_cast(env->GetFileSystem().get()); + std::vector to_be_deleted; + ASSERT_OK( + cimpl->FindObsoleteFiles(cimpl->GetSrcBucketName(), &to_be_deleted)); + // TODO(igor): Re-enable once purger code is fixed + // ASSERT_EQ(to_be_deleted.size(), 0); + + // Assert that there are no redundant dbid + ASSERT_OK( + cimpl->FindObsoleteDbid(cimpl->GetSrcBucketName(), &to_be_deleted)); + // TODO(igor): Re-enable once purger code is fixed + // ASSERT_EQ(to_be_deleted.size(), 0); + } + + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + GetCloudFileSystem()->GetSrcBucketName(), clone_path1); + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + GetCloudFileSystem()->GetSrcBucketName(), clone_path2); +} + +// +// verify that dbid registry is appropriately handled +// +TEST_F(CloudTest, DbidRegistry) { + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // Assert that there is one db in the registry + DbidList dbs; + ASSERT_OK(GetCloudFileSystem()->GetDbidList( + GetCloudFileSystem()->GetSrcBucketName(), &dbs)); + ASSERT_GE(dbs.size(), 1); + + CloseDB(); +} + +TEST_F(CloudTest, KeepLocalFiles) { + cloud_fs_options_.keep_local_sst_files = true; + for (int iter = 0; iter < 4; ++iter) { + cloud_fs_options_.use_direct_io_for_cloud_download = true; + // Create two files + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Hello2", "World2")); + ASSERT_OK(db_->Flush(FlushOptions())); + + CloseDB(); + DestroyDir(dbname_); + OpenDB(); + + std::vector files; + ASSERT_OK(Env::Default()->GetChildren(dbname_, &files)); + long sst_files = + std::count_if(files.begin(), files.end(), [](std::string const& file) { + return file.find("sst") != std::string::npos; + }); + ASSERT_EQ(sst_files, 2); + + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + ASSERT_OK(db_->Get(ReadOptions(), "Hello2", &value)); + ASSERT_EQ(value, "World2"); + + CloseDB(); + ValidateCloudLiveFilesSrcSize(); + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + GetCloudFileSystem()->GetSrcBucketName(), dbname_); + DestroyDir(dbname_); + } +} + +TEST_F(CloudTest, CopyToFromGcs) { + std::string fname = dbname_ + "/100000.sst"; + + // iter 0 -- not using transfer manager + // iter 1 -- using transfer manager + for (int iter = 0; iter < 2; ++iter) { + // Create aws env + cloud_fs_options_.keep_local_sst_files = true; + CreateCloudEnv(); + auto* cimpl = GetCloudFileSystemImpl(); + cimpl->TEST_InitEmptyCloudManifest(); + char buffer[1 * 1024 * 1024]; + + // create a 10 MB file and upload it to cloud + { + std::unique_ptr writer; + ASSERT_OK(aenv_->GetFileSystem()->NewWritableFile(fname, kFileOptions, + &writer, kDbg)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK( + writer->Append(Slice(buffer, sizeof(buffer)), kIOOptions, kDbg)); + } + // sync and close file + } + + // delete the file manually. + ASSERT_OK(base_env_->DeleteFile(fname)); + + // reopen file for reading. It should be refetched from cloud storage. + { + std::unique_ptr reader; + ASSERT_OK(aenv_->GetFileSystem()->NewRandomAccessFile(fname, kFileOptions, + &reader, kDbg)); + + uint64_t offset = 0; + for (int i = 0; i < 10; i++) { + Slice result; + char* scratch = &buffer[0]; + ASSERT_OK(reader->Read(offset, sizeof(buffer), kIOOptions, &result, + scratch, kDbg)); + ASSERT_EQ(result.size(), sizeof(buffer)); + offset += sizeof(buffer); + } + } + } +} + +TEST_F(CloudTest, DelayFileDeletion) { + std::string fname = dbname_ + "/000010.sst"; + + // Create aws env + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.cloud_file_deletion_delay = std::chrono::seconds(2); + CreateCloudEnv(); + auto* cimpl = GetCloudFileSystemImpl(); + cimpl->TEST_InitEmptyCloudManifest(); + + auto createFile = [&]() { + std::unique_ptr writer; + ASSERT_OK(aenv_->GetFileSystem()->NewWritableFile(fname, kFileOptions, + &writer, kDbg)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(writer->Append("igor", kIOOptions, kDbg)); + } + // sync and close file + }; + + for (int iter = 0; iter <= 1; ++iter) { + createFile(); + // delete the file + ASSERT_OK(aenv_->GetFileSystem()->DeleteFile(fname, kIOOptions, kDbg)); + // file should still be there + ASSERT_OK(aenv_->GetFileSystem()->FileExists(fname, kIOOptions, kDbg)); + + if (iter == 1) { + // should prevent the deletion + createFile(); + } + + std::this_thread::sleep_for(std::chrono::seconds(3)); + auto st = aenv_->GetFileSystem()->FileExists(fname, kIOOptions, kDbg); + if (iter == 0) { + // in iter==0 file should be deleted after 2 seconds + ASSERT_TRUE(st.IsNotFound()); + } else { + // in iter==1 file should not be deleted because we wrote the new file + ASSERT_OK(st); + } + } +} + +// Verify that a savepoint copies all src files to destination +TEST_F(CloudTest, Savepoint) { + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + value.clear(); + std::string dest_path = "/clone2_path-" + test_id_; + { + // Create a new instance with different src and destination paths. + // This is true clone and should have all the contents of the masterdb + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath1", cloud_fs_options_.src_bucket.GetBucketName(), + dest_path, &cloud_db, &env); + + // check that the original kv appears in the clone + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // there should be only one sst file + std::vector flist; + cloud_db->GetLiveFilesMetaData(&flist); + ASSERT_TRUE(flist.size() == 1); + + auto* cimpl = static_cast(env->GetFileSystem().get()); + auto remapped_fname = cimpl->RemapFilename(flist[0].name); + // source path + std::string spath = cimpl->GetSrcObjectPath() + "/" + remapped_fname; + ASSERT_OK(cimpl->GetStorageProvider()->ExistsCloudObject( + cimpl->GetSrcBucketName(), spath)); + + // Verify that the destination path does not have any sst files + std::string dpath = dest_path + "/" + remapped_fname; + ASSERT_TRUE(cimpl->GetStorageProvider() + ->ExistsCloudObject(cimpl->GetSrcBucketName(), dpath) + .IsNotFound()); + + // write a new value to the clone + ASSERT_OK(cloud_db->Put(WriteOptions(), "Hell", "Done")); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hell", &value)); + ASSERT_TRUE(value.compare("Done") == 0); + + // Invoke savepoint to populate destination path from source path + ASSERT_OK(cloud_db->Savepoint()); + + // check that the sst file is copied to dest path + ASSERT_OK(cimpl->GetStorageProvider()->ExistsCloudObject( + cimpl->GetSrcBucketName(), dpath)); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + } + { + // Reopen the clone + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath2", cloud_fs_options_.src_bucket.GetBucketName(), + dest_path, &cloud_db, &env); + + // check that the both kvs appears in the clone + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hell", &value)); + ASSERT_TRUE(value.compare("Done") == 0); + } + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + GetCloudFileSystem()->GetSrcBucketName(), dest_path); +} + +// no encryption now +// TEST_F(CloudTest, Encryption) { +// // Create aws env +// cloud_fs_options_.server_side_encryption = true; +// char* key_id = getenv("GCP_KMS_KEY_ID"); +// if (key_id != nullptr) { +// cloud_fs_options_.encryption_key_id = std::string(key_id); +// Log(options_.info_log, "Found encryption key id in env variable %s", +// key_id); +// } + +// OpenDB(); + +// ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); +// // create a file +// ASSERT_OK(db_->Flush(FlushOptions())); +// CloseDB(); + +// OpenDB(); +// std::string value; +// ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); +// ASSERT_EQ(value, "World"); +// CloseDB(); +// } + +TEST_F(CloudTest, DirectReads) { + options_.use_direct_reads = true; + options_.use_direct_io_for_flush_and_compaction = true; + BlockBasedTableOptions bbto; + bbto.no_block_cache = true; + bbto.block_size = 1024; + options_.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + OpenDB(); + + for (int i = 0; i < 50; ++i) { + ASSERT_OK(db_->Put(WriteOptions(), "Hello" + std::to_string(i), "World")); + } + // create a file + ASSERT_OK(db_->Flush(FlushOptions())); + + std::string value; + for (int i = 0; i < 50; ++i) { + ASSERT_OK(db_->Get(ReadOptions(), "Hello" + std::to_string(i), &value)); + ASSERT_EQ(value, "World"); + } + CloseDB(); +} + +#ifdef USE_KAFKA +TEST_F(CloudTest, KeepLocalLogKafka) { + cloud_fs_options_.keep_local_log_files = false; + cloud_fs_options_.log_type = LogType::kLogKafka; + cloud_fs_options_.kafka_log_options + .client_config_params["metadata.broker.list"] = "localhost:9092"; + + OpenDB(); + + ASSERT_OK(db_->Put(WriteOptions(), "Franz", "Kafka")); + + // Destroy DB in memory and on local file system. + delete db_; + db_ = nullptr; + aenv_.reset(); + DestroyDir(dbname_); + DestroyDir("/tmp/ROCKSET"); + + // Create new env. + CreateCloudEnv(); + + // Give env enough time to consume WALs + std::this_thread::sleep_for(std::chrono::seconds(3)); + + // Open DB. + cloud_fs_options_.keep_local_log_files = true; + auto* cimpl = GetCloudFileSystemImpl(); + options_.wal_dir = cimpl->GetWALCacheDir(); + OpenDB(); + + // Test read. + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "Franz", &value)); + ASSERT_EQ(value, "Kafka"); + + CloseDB(); +} +#endif /* USE_KAFKA */ + +// TODO(igor): determine why this fails, +// https://github.com/rockset/rocksdb-cloud/issues/35 +// TEST_F(CloudTest, DISABLED_KeepLocalLogKinesis) { +// cloud_fs_options_.keep_local_log_files = false; +// cloud_fs_options_.log_type = LogType::kLogKinesis; + +// OpenDB(); + +// // Test write. +// ASSERT_OK(db_->Put(WriteOptions(), "Tele", "Kinesis")); + +// // Destroy DB in memory and on local file system. +// delete db_; +// db_ = nullptr; +// aenv_.reset(); +// DestroyDir(dbname_); +// DestroyDir("/tmp/ROCKSET"); + +// // Create new env. +// CreateCloudEnv(); + +// // Give env enough time to consume WALs +// std::this_thread::sleep_for(std::chrono::seconds(3)); + +// // Open DB. +// cloud_fs_options_.keep_local_log_files = true; +// auto* cimpl = GetCloudFileSystemImpl(); +// options_.wal_dir = cimpl->GetWALCacheDir(); +// OpenDB(); + +// // Test read. +// std::string value; +// ASSERT_OK(db_->Get(ReadOptions(), "Tele", &value)); +// ASSERT_EQ(value, "Kinesis"); + +// CloseDB(); +// } + +// Test whether we are able to recover nicely from two different writers to the +// same S3 bucket. (The feature that was enabled by CLOUDMANIFEST) +TEST_F(CloudTest, TwoDBsOneBucket) { + auto firstDB = dbname_; + auto secondDB = dbname_ + "-1"; + cloud_fs_options_.keep_local_sst_files = true; + std::string value; + + cloud_fs_options_.resync_on_open = true; + OpenDB(); + auto* cimpl = GetCloudFileSystemImpl(); + auto firstManifestFile = + cimpl->GetDestObjectPath() + "/" + cimpl->RemapFilename("MANIFEST-1"); + EXPECT_OK(cimpl->GetStorageProvider()->ExistsCloudObject( + cimpl->GetDestBucketName(), firstManifestFile)); + // Create two files + ASSERT_OK(db_->Put(WriteOptions(), "First", "File")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Second", "File")); + ASSERT_OK(db_->Flush(FlushOptions())); + auto files = GetSSTFiles(dbname_); + EXPECT_EQ(files.size(), 2); + CloseDB(); + + cloud_fs_options_.resync_on_open = false; + // Open again, with no destination bucket + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Third", "File")); + ASSERT_OK(db_->Flush(FlushOptions())); + auto newFiles = GetSSTFiles(dbname_); + EXPECT_EQ(newFiles.size(), 3); + // Remember the third file we created + std::vector diff; + std::set_difference(newFiles.begin(), newFiles.end(), files.begin(), + files.end(), std::inserter(diff, diff.begin())); + ASSERT_EQ(diff.size(), 1); + auto thirdFile = diff[0]; + CloseDB(); + + // Open in a different directory with destination bucket set + dbname_ = secondDB; + cloud_fs_options_.dest_bucket = cloud_fs_options_.src_bucket; + cloud_fs_options_.resync_on_open = true; + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Third", "DifferentFile")); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseDB(); + + // Open back in the first directory with no destination + dbname_ = firstDB; + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + cloud_fs_options_.resync_on_open = false; + OpenDB(); + // Changes to the cloud database should make no difference for us. This is an + // important check because we should not reinitialize from the cloud if we + // have a valid local directory! + ASSERT_OK(db_->Get(ReadOptions(), "Third", &value)); + EXPECT_EQ(value, "File"); + CloseDB(); + + // Reopen in the first directory, this time with destination path + dbname_ = firstDB; + cloud_fs_options_.dest_bucket = cloud_fs_options_.src_bucket; + cloud_fs_options_.resync_on_open = true; + OpenDB(); + // Changes to the cloud database should be pulled down now. + ASSERT_OK(db_->Get(ReadOptions(), "Third", &value)); + EXPECT_EQ(value, "DifferentFile"); + files = GetSSTFiles(dbname_); + // Should no longer be in my directory because it's not part of the new + // MANIFEST. + EXPECT_TRUE(files.find(thirdFile) == files.end()); + + // We need to sleep a bit because file deletion happens in a different thread, + // so it might not be immediately deleted. + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + EXPECT_TRUE(GetCloudFileSystem() + ->GetStorageProvider() + ->ExistsCloudObject(GetCloudFileSystem()->GetDestBucketName(), + firstManifestFile) + .IsNotFound()); + CloseDB(); +} + +// This test is similar to TwoDBsOneBucket, but is much more chaotic and illegal +// -- it runs two databases on exact same S3 bucket. The work on CLOUDMANIFEST +// enables us to run in that configuration for extended amount of time (1 hour +// by default) without any issues -- the last CLOUDMANIFEST writer wins. +// This test only applies when cookie is empty. So whenever db is reopened, it +// always fetches the latest CM/M files from s3 +TEST_F(CloudTest, TwoConcurrentWritersCookieEmpty) { + cloud_fs_options_.resync_on_open = true; + auto firstDB = dbname_; + auto secondDB = dbname_ + "-1"; + + DBCloud *db1, *db2; + Env *aenv1, *aenv2; + + auto openDB1 = [&] { + dbname_ = firstDB; + OpenDB(); + db1 = db_; + db_ = nullptr; + aenv1 = aenv_.release(); + }; + auto openDB2 = [&] { + dbname_ = secondDB; + OpenDB(); + db2 = db_; + db_ = nullptr; + aenv2 = aenv_.release(); + }; + auto closeDB1 = [&] { + db_ = db1; + aenv_.reset(aenv1); + CloseDB(); + }; + auto closeDB2 = [&] { + db_ = db2; + aenv_.reset(aenv2); + CloseDB(); + }; + + openDB1(); + openDB2(); + + // Create bunch of files, reopening the databases during + for (int i = 0; i < 5; ++i) { + closeDB1(); + if (i == 2) { + DestroyDir(firstDB); + } + // opening the database makes me a master (i.e. CLOUDMANIFEST points to my + // manifest), my writes are applied to the shared space! + openDB1(); + for (int j = 0; j < 5; ++j) { + auto key = std::to_string(i) + std::to_string(j) + "1"; + ASSERT_OK(db1->Put(WriteOptions(), key, "FirstDB")); + ASSERT_OK(db1->Flush(FlushOptions())); + } + closeDB2(); + if (i == 2) { + DestroyDir(secondDB); + } + // opening the database makes me a master (i.e. CLOUDMANIFEST points to my + // manifest), my writes are applied to the shared space! + openDB2(); + for (int j = 0; j < 5; ++j) { + auto key = std::to_string(i) + std::to_string(j) + "2"; + ASSERT_OK(db2->Put(WriteOptions(), key, "SecondDB")); + ASSERT_OK(db2->Flush(FlushOptions())); + } + } + + dbname_ = firstDB; + // This write should not be applied, because DB2 is currently the owner of the + // S3 bucket + ASSERT_OK(db1->Put(WriteOptions(), "ShouldNotBeApplied", "")); + ASSERT_OK(db1->Flush(FlushOptions())); + closeDB1(); + closeDB2(); + + openDB1(); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 5; ++j) { + std::string val; + auto key = std::to_string(i) + std::to_string(j); + ASSERT_OK(db1->Get(ReadOptions(), key + "1", &val)); + EXPECT_EQ(val, "FirstDB"); + ASSERT_OK(db1->Get(ReadOptions(), key + "2", &val)); + EXPECT_EQ(val, "SecondDB"); + } + } + + std::string v; + ASSERT_TRUE(db1->Get(ReadOptions(), "ShouldNotBeApplied", &v).IsNotFound()); + closeDB1(); +} + +// Creates a pure RocksDB database and makes sure we can migrate to RocksDB +// Cloud +TEST_F(CloudTest, MigrateFromPureRocksDB) { + { // Create local RocksDB + Options options; + options.create_if_missing = true; + DB* dbptr; + std::unique_ptr db; + ASSERT_OK(DB::Open(options, dbname_, &dbptr)); + db.reset(dbptr); + // create 5 files + for (int i = 0; i < 5; ++i) { + auto key = "key" + std::to_string(i); + ASSERT_OK(db->Put(WriteOptions(), key, key)); + ASSERT_OK(db->Flush(FlushOptions())); + } + } + + CreateCloudEnv(); + ASSERT_OK(GetCloudFileSystem()->MigrateFromPureRocksDB(dbname_)); + + // Now open RocksDB cloud + // TODO(dhruba) Figure out how to make this work without skipping dbid + // verification + cloud_fs_options_.skip_dbid_verification = true; + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.validate_filesize = false; + OpenDB(); + for (int i = 5; i < 10; ++i) { + auto key = "key" + std::to_string(i); + ASSERT_OK(db_->Put(WriteOptions(), key, key)); + ASSERT_OK(db_->Flush(FlushOptions())); + } + + for (int i = 0; i < 10; ++i) { + auto key = "key" + std::to_string(i); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), key, &value)); + ASSERT_EQ(value, key); + } + CloseDB(); +} + +// Tests that we can open cloud DB without destination and source bucket set. +// This is useful for tests. +TEST_F(CloudTest, NoDestOrSrc) { + DestroyDir(dbname_); + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.src_bucket.SetBucketName(""); + cloud_fs_options_.src_bucket.SetObjectPath(""); + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "key", "value")); + ASSERT_OK(db_->Flush(FlushOptions())); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "key", &value)); + ASSERT_EQ(value, "value"); + CloseDB(); + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "key", &value)); + ASSERT_EQ(value, "value"); + CloseDB(); +} + +TEST_F(CloudTest, PreloadCloudManifest) { + DestroyDir(dbname_); + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + value.clear(); + + // Reopen and validate, preload cloud manifest + GetCloudFileSystem()->PreloadCloudManifest(dbname_); + + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); +} + +// +// Test Ephemeral mode. In this mode, the database is cloned +// from a cloud bucket but new writes are not propagated +// back to any cloud bucket. Once cloned, all updates are local. +// +TEST_F(CloudTest, Ephemeral) { + cloud_fs_options_.keep_local_sst_files = true; + options_.level0_file_num_compaction_trigger = 100; // never compact + + // Create a primary DB with two files + OpenDB(); + std::string value; + std::string newdb1_dbid; + std::set cloud_files; + ASSERT_OK(db_->Put(WriteOptions(), "Name", "dhruba")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Hello2", "borthakur")); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseDB(); + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + + // Reopen the same database in ephemeral mode by cloning the original. + // Do not destroy the local dir. Writes to this db does not make it back + // to any cloud storage. + { + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("db_ephemeral", "", "", &cloud_db, &env); + + // Retrieve the id of the first reopen + ASSERT_OK(cloud_db->GetDbIdentity(newdb1_dbid)); + + // verify that we still have two sst files + ASSERT_EQ(2, GetSSTFilesClone("db_ephemeral").size()); + + ASSERT_OK(cloud_db->Get(ReadOptions(), "Name", &value)); + ASSERT_EQ(value, "dhruba"); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello2", &value)); + ASSERT_EQ(value, "borthakur"); + + // Write one more record. + // There should be 3 local sst files in the ephemeral db. + ASSERT_OK(cloud_db->Put(WriteOptions(), "zip", "94087")); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + ASSERT_EQ(3, GetSSTFilesClone("db_ephemeral").size()); + + // check that cloud files did not get updated + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + ASSERT_EQ(2, cloud_files.size()); + cloud_files.clear(); + } + + // reopen main db and write two more records to it + OpenDB(); + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + + // write two more records to it. + ASSERT_OK(db_->Put(WriteOptions(), "Key1", "onlyInMainDB")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Key2", "onlyInMainDB")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_EQ(4, GetSSTFiles(dbname_).size()); + CloseDB(); + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + ASSERT_EQ(4, cloud_files.size()); + cloud_files.clear(); + + // At this point, the main db has 4 files while the ephemeral + // database has diverged earlier with 3 local files. If we try + // to reopen the ephemeral clone, it should not download new + // files from the cloud + { + std::unique_ptr env; + std::unique_ptr cloud_db; + std::string dbid; + options_.info_log = nullptr; + CreateLoggerFromOptions(clone_dir_ + "/db_ephemeral", options_, + &options_.info_log); + + CloneDB("db_ephemeral", "", "", &cloud_db, &env); + + // Retrieve the id of this clone. It should be same as before + ASSERT_OK(cloud_db->GetDbIdentity(dbid)); + ASSERT_EQ(newdb1_dbid, dbid); + + ASSERT_EQ(3, GetSSTFilesClone("db_ephemeral").size()); + + // verify that a key written to the ephemeral db still exists + ASSERT_OK(cloud_db->Get(ReadOptions(), "zip", &value)); + ASSERT_EQ(value, "94087"); + + // verify that keys written to the main db after the ephemeral + // was clones do not appear in the ephemeral db. + ASSERT_NOK(cloud_db->Get(ReadOptions(), "Key1", &value)); + ASSERT_NOK(cloud_db->Get(ReadOptions(), "Key2", &value)); + } +} + +// This test is performed in a rare race condition where ephemral clone is +// started after durable clone upload its CLOUDMANIFEST but before it uploads +// one of the MANIFEST. In this case, we want to verify that ephemeral clone is +// able to reinitialize instead of crash looping. +TEST_F(CloudTest, EphemeralOnCorruptedDB) { + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.resync_on_open = true; + options_.level0_file_num_compaction_trigger = 100; // never compact + + OpenDB(); + + std::vector files; + base_env_->GetChildren(dbname_, &files); + + // Get the MANIFEST file + std::string manifest_file_name; + for (auto const& file_name : files) { + if (file_name.rfind("MANIFEST", 0) == 0) { + manifest_file_name = file_name; + break; + } + } + + ASSERT_FALSE(manifest_file_name.empty()); + + // Delete MANIFEST file from S3 bucket. + // This is to simulate the scenario where CLOUDMANIFEST is uploaded, but + // MANIFEST is not yet uploaded from the durable shard. + ASSERT_NE(aenv_.get(), nullptr); + GetCloudFileSystem()->GetStorageProvider()->DeleteCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + "/" + manifest_file_name); + + // Ephemeral clone should fail. + std::unique_ptr clone_db; + std::unique_ptr env; + Status st = CloneDB("clone1", "", "", &clone_db, &env); + ASSERT_TRUE(st.IsCorruption()); + + // Put the MANIFEST file back + GetCloudFileSystem()->GetStorageProvider()->PutCloudObject( + dbname_ + "/" + manifest_file_name, + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + "/" + manifest_file_name); + + // Try one more time. This time it should succeed. + clone_db.reset(); + env.reset(); + st = CloneDB("clone1", "", "", &clone_db, &env); + ASSERT_OK(st); + + clone_db->Close(); + CloseDB(); +} + +// +// Test Ephemeral clones with resyncOnOpen mode. +// In this mode, every open of the ephemeral clone db causes its +// data to be resynced with the master db. +// +TEST_F(CloudTest, EphemeralResync) { + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.resync_on_open = true; + options_.level0_file_num_compaction_trigger = 100; // never compact + + // Create a primary DB with two files + OpenDB(); + std::string value; + std::string newdb1_dbid; + std::set cloud_files; + ASSERT_OK(db_->Put(WriteOptions(), "Name", "dhruba")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Hello2", "borthakur")); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseDB(); + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + + // Reopen the same database in ephemeral mode by cloning the original. + // Do not destroy the local dir. Writes to this db does not make it back + // to any cloud storage. + { + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("db_ephemeral", "", "", &cloud_db, &env); + + // Retrieve the id of the first reopen + ASSERT_OK(cloud_db->GetDbIdentity(newdb1_dbid)); + + // verify that we still have two sst files + ASSERT_EQ(2, GetSSTFilesClone("db_ephemeral").size()); + + ASSERT_OK(cloud_db->Get(ReadOptions(), "Name", &value)); + ASSERT_EQ(value, "dhruba"); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello2", &value)); + ASSERT_EQ(value, "borthakur"); + + // Write one more record. + // There should be 3 local sst files in the ephemeral db. + ASSERT_OK(cloud_db->Put(WriteOptions(), "zip", "94087")); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + ASSERT_EQ(3, GetSSTFilesClone("db_ephemeral").size()); + + // check that cloud files did not get updated + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + ASSERT_EQ(2, cloud_files.size()); + cloud_files.clear(); + } + + // reopen main db and write two more records to it + OpenDB(); + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + + // write two more records to it. + ASSERT_OK(db_->Put(WriteOptions(), "Key1", "onlyInMainDB")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Key2", "onlyInMainDB")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_EQ(4, GetSSTFiles(dbname_).size()); + CloseDB(); + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + ASSERT_EQ(4, cloud_files.size()); + cloud_files.clear(); + + // At this point, the main db has 4 files while the ephemeral + // database has diverged earlier with 3 local files. + // Reopen the ephemeral db with resync_on_open flag. + // This means that earlier updates to the ephemeral db are lost. + // It also means that the most latest updates in the master db + // are reflected in the newly opened ephemeral database. + { + std::unique_ptr env; + std::unique_ptr cloud_db; + std::string dbid; + options_.info_log = nullptr; + CreateLoggerFromOptions(clone_dir_ + "/db_ephemeral", options_, + &options_.info_log); + + CloneDB("db_ephemeral", "", "", &cloud_db, &env); + + // Retrieve the id of this clone. It should be same as before + ASSERT_OK(cloud_db->GetDbIdentity(dbid)); + ASSERT_EQ(newdb1_dbid, dbid); + + // verify that a key written to the ephemeral db does not exist + ASSERT_NOK(cloud_db->Get(ReadOptions(), "zip", &value)); + + // verify that keys written to the main db after the ephemeral + // was clones appear in the ephemeral db. + ASSERT_OK(cloud_db->Get(ReadOptions(), "Key1", &value)); + ASSERT_EQ(value, "onlyInMainDB"); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Key2", &value)); + ASSERT_EQ(value, "onlyInMainDB"); + } +} + +TEST_F(CloudTest, CheckpointToCloud) { + cloud_fs_options_.keep_local_sst_files = true; + options_.level0_file_num_compaction_trigger = 100; // never compact + + // Pre-create the bucket. + CreateCloudEnv(); + aenv_.reset(); + + // S3 is eventual consistency. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + auto checkpoint_bucket = cloud_fs_options_.dest_bucket; + + std::string ckpt_from_object_path = + cloud_fs_options_.dest_bucket.GetObjectPath(); + ckpt_from_object_path += "_from"; + cloud_fs_options_.src_bucket = BucketOptions(); + cloud_fs_options_.src_bucket.SetObjectPath(ckpt_from_object_path); + cloud_fs_options_.dest_bucket = BucketOptions(); + cloud_fs_options_.dest_bucket.SetObjectPath(ckpt_from_object_path); + + // Create a DB with two files + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); + ASSERT_OK(db_->Flush(FlushOptions())); + + ASSERT_OK( + db_->CheckpointToCloud(checkpoint_bucket, CheckpointToCloudOptions())); + + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + CloseDB(); + + DestroyDir(dbname_); + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + checkpoint_bucket.GetBucketName(), + cloud_fs_options_.dest_bucket.GetObjectPath()); + + cloud_fs_options_.src_bucket = checkpoint_bucket; + cloud_fs_options_.dest_bucket = checkpoint_bucket; + + OpenDB(); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "a", &value)); + ASSERT_EQ(value, "b"); + ASSERT_OK(db_->Get(ReadOptions(), "c", &value)); + ASSERT_EQ(value, "d"); + CloseDB(); + + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + checkpoint_bucket.GetBucketName(), checkpoint_bucket.GetObjectPath()); +} + +// Basic test to copy object within S3. +TEST_F(CloudTest, CopyObjectTest) { + CreateCloudEnv(); + + // We need to open an empty DB in order for epoch to work. + OpenDB(); + + std::string content = "This is a test file"; + std::string fname = dbname_ + "/100000.sst"; + std::string dst_fname = dbname_ + "/200000.sst"; + + { + std::unique_ptr writableFile; + aenv_->GetFileSystem()->NewWritableFile(fname, kFileOptions, &writableFile, + kDbg); + writableFile->Append(content, kIOOptions, kDbg); + writableFile->Fsync(kIOOptions, kDbg); + } + + auto st = GetCloudFileSystem()->GetStorageProvider()->CopyCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->RemapFilename(fname), + GetCloudFileSystem()->GetSrcBucketName(), dst_fname); + ASSERT_OK(st); + + { + std::unique_ptr readableFile; + st = GetCloudFileSystem()->GetStorageProvider()->NewCloudReadableFile( + GetCloudFileSystem()->GetSrcBucketName(), dst_fname, kFileOptions, + &readableFile, kDbg); + ASSERT_OK(st); + + char scratch[100]; + Slice result; + std::unique_ptr sequentialFile(readableFile.release()); + st = sequentialFile->Read(100, kIOOptions, &result, scratch, kDbg); + ASSERT_OK(st); + ASSERT_EQ(19, result.size()); + ASSERT_EQ(result, Slice(content)); + } + + CloseDB(); +} + +// +// Verify that we can cache data from S3 in persistent cache. +// +TEST_F(CloudTest, PersistentCache) { + std::string pcache = test::TmpDir() + "/persistent_cache"; + SetPersistentCache(pcache, 1); + + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + value.clear(); + + // Reopen and validate + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + CloseDB(); +} + +// This test create 2 DBs that shares a block cache. Ensure that reads from one +// DB do not get the values from the other DB. +TEST_F(CloudTest, SharedBlockCache) { + cloud_fs_options_.keep_local_sst_files = false; + + // Share the block cache. + BlockBasedTableOptions bbto; + bbto.block_cache = NewLRUCache(10 * 1024 * 1024); + bbto.format_version = 4; + options_.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + OpenDB(); + + std::unique_ptr clone_env; + std::unique_ptr clone_db; + CloneDB("newdb1", cloud_fs_options_.src_bucket.GetBucketName(), + cloud_fs_options_.src_bucket.GetObjectPath() + "-clone", &clone_db, + &clone_env, false /* force_keep_local_on_invalid_dest_bucket */); + + // Flush the first DB. + db_->Put(WriteOptions(), "db", "original"); + db_->Flush(FlushOptions()); + + // Flush the second DB. + clone_db->Put(WriteOptions(), "db", "clone"); + clone_db->Flush(FlushOptions()); + + std::vector file_metadatas; + db_->GetLiveFilesMetaData(&file_metadatas); + ASSERT_EQ(1, file_metadatas.size()); + + file_metadatas.clear(); + clone_db->GetLiveFilesMetaData(&file_metadatas); + ASSERT_EQ(1, file_metadatas.size()); + + std::string value; + clone_db->Get(ReadOptions(), "db", &value); + ASSERT_EQ("clone", value); + + db_->Get(ReadOptions(), "db", &value); + ASSERT_EQ("original", value); + + // Cleanup + clone_db->Close(); + CloseDB(); + auto* clone_cloud_fs = + dynamic_cast(clone_env->GetFileSystem().get()); + clone_cloud_fs->GetStorageProvider()->EmptyBucket( + cloud_fs_options_.src_bucket.GetBucketName(), + cloud_fs_options_.src_bucket.GetObjectPath() + "-clone"); +} + +// Verify that sst_file_cache and file_cache cannot be set together +TEST_F(CloudTest, KeepLocalFilesAndFileCache) { + cloud_fs_options_.sst_file_cache = NewLRUCache(1024); // 1 KB cache + cloud_fs_options_.keep_local_sst_files = true; + ASSERT_TRUE(checkOpen().IsInvalidArgument()); +} + +// Verify that sst_file_cache can be disabled +TEST_F(CloudTest, FileCacheZero) { + cloud_fs_options_.sst_file_cache = NewLRUCache(0); // zero size + OpenDB(); + auto* cimpl = GetCloudFileSystemImpl(); + ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); + ASSERT_OK(db_->Flush(FlushOptions())); + auto local_files = GetSSTFiles(dbname_); + EXPECT_EQ(local_files.size(), 0); + EXPECT_EQ(cimpl->FileCacheGetCharge(), 0); + + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "a", &value)); + ASSERT_TRUE(value.compare("b") == 0); + ASSERT_OK(db_->Get(ReadOptions(), "c", &value)); + ASSERT_TRUE(value.compare("d") == 0); + CloseDB(); +} + +// Verify that sst_file_cache is very small, so no files are local. +TEST_F(CloudTest, FileCacheSmall) { + cloud_fs_options_.sst_file_cache = NewLRUCache(10); // Practically zero size + OpenDB(); + auto* cimpl = GetCloudFileSystemImpl(); + ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); + ASSERT_OK(db_->Flush(FlushOptions())); + auto local_files = GetSSTFiles(dbname_); + EXPECT_EQ(local_files.size(), 0); + EXPECT_EQ(cimpl->FileCacheGetCharge(), 0); + CloseDB(); +} + +// Relatively large sst_file cache, so all files are local. +TEST_F(CloudTest, FileCacheLarge) { + size_t capacity = 10240L; + std::shared_ptr cache = NewLRUCache(capacity); + cloud_fs_options_.sst_file_cache = cache; + + // generate two sst files. + OpenDB(); + auto* cimpl = GetCloudFileSystemImpl(); + ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); + ASSERT_OK(db_->Flush(FlushOptions())); + + // check that local sst files exist + auto local_files = GetSSTFiles(dbname_); + EXPECT_EQ(local_files.size(), 2); + + // check that local sst files have non zero size + uint64_t totalFileSize = 0; + GetSSTFilesTotalSize(dbname_, &totalFileSize); + EXPECT_GT(totalFileSize, 0); + EXPECT_GE(capacity, totalFileSize); + + // check that cache has two entries + EXPECT_EQ(cimpl->FileCacheGetNumItems(), 2); + + // check that cache charge matches total local sst file size + EXPECT_EQ(cimpl->FileCacheGetNumItems(), 2); + EXPECT_EQ(cimpl->FileCacheGetCharge(), totalFileSize); + CloseDB(); +} + +// Cache will have a few files only. +TEST_F(CloudTest, FileCacheOnDemand) { + size_t capacity = 3000; + int num_shard_bits = 0; // 1 shard + bool strict_capacity_limit = false; + double high_pri_pool_ratio = 0; + + std::shared_ptr cache = + NewLRUCache(capacity, num_shard_bits, strict_capacity_limit, + high_pri_pool_ratio, nullptr, kDefaultToAdaptiveMutex, + CacheMetadataChargePolicy::kDontChargeCacheMetadata); + cloud_fs_options_.sst_file_cache = cache; + options_.level0_file_num_compaction_trigger = 100; // never compact + + OpenDB(); + auto* cimpl = GetCloudFileSystemImpl(); + + // generate four sst files, each of size about 884 bytes + ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "e", "f")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "g", "h")); + ASSERT_OK(db_->Flush(FlushOptions())); + + // The db should have 4 sst files in the manifest. + std::vector flist; + db_->GetLiveFilesMetaData(&flist); + EXPECT_EQ(flist.size(), 4); + + // verify that there are only two entries in the cache + EXPECT_EQ(cimpl->FileCacheGetNumItems(), 2); + EXPECT_EQ(cimpl->FileCacheGetCharge(), cache->GetUsage()); + + // There should be only two local sst files. + auto local_files = GetSSTFiles(dbname_); + EXPECT_EQ(local_files.size(), 2); + + CloseDB(); +} + +TEST_F(CloudTest, FindLiveFilesFetchManifestTest) { + OpenDB(); + ASSERT_OK(db_->Put({}, "a", "1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + DestroyDir(dbname_); + + // recreate cloud env, which points to the same bucket and objectpath + CreateCloudEnv(); + + std::vector live_sst_files; + std::string manifest_file; + + // fetch and load CloudManifest + ASSERT_OK(GetCloudFileSystem()->PreloadCloudManifest(dbname_)); + + // manifest file will be fetched to local db + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &live_sst_files, + &manifest_file)); + EXPECT_EQ(live_sst_files.size(), 1); +} + +TEST_F(CloudTest, FileModificationTimeTest) { + OpenDB(); + ASSERT_OK(db_->Put({}, "a", "1")); + ASSERT_OK(db_->Flush({})); + std::vector live_sst_files; + std::string manifest_file; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &live_sst_files, + &manifest_file)); + uint64_t modtime1; + ASSERT_OK(aenv_->GetFileSystem()->GetFileModificationTime( + dbname_ + pathsep + manifest_file, kIOOptions, &modtime1, kDbg)); + CloseDB(); + DestroyDir(dbname_); + // don't roll cloud manifest so that manifest file epoch is not updated + cloud_fs_options_.roll_cloud_manifest_on_open = false; + OpenDB(); + uint64_t modtime2; + ASSERT_OK(aenv_->GetFileSystem()->GetFileModificationTime( + dbname_ + pathsep + manifest_file, kIOOptions, &modtime2, kDbg)); + // we read local file modification time, so the second time we open db, the + // modification time is changed + EXPECT_GT(modtime2, modtime1); +} + +TEST_F(CloudTest, EmptyCookieTest) { + // By default cookie is empty + OpenDB(); + auto* cfs_impl = GetCloudFileSystemImpl(); + auto cloud_manifest_file = cfs_impl->CloudManifestFile(dbname_); + EXPECT_EQ(basename(cloud_manifest_file), "CLOUDMANIFEST"); + CloseDB(); +} + +TEST_F(CloudTest, NonEmptyCookieTest) { + cloud_fs_options_.new_cookie_on_open = "000001"; + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + + auto cloud_manifest_file = + MakeCloudManifestFile(dbname_, cloud_fs_options_.new_cookie_on_open); + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), cloud_manifest_file)); + EXPECT_EQ(basename(cloud_manifest_file), "CLOUDMANIFEST-000001"); + CloseDB(); + DestroyDir(dbname_); + cloud_fs_options_.cookie_on_open = "000001"; + cloud_fs_options_.new_cookie_on_open = "000001"; + OpenDB(); + + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), cloud_manifest_file)); + EXPECT_EQ(basename(cloud_manifest_file), "CLOUDMANIFEST-000001"); + CloseDB(); +} + +// Verify that live sst files are the same after applying cloud manifest delta +TEST_F(CloudTest, LiveFilesConsistentAfterApplyCloudManifestDeltaTest) { + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Flush(FlushOptions())); + + std::vector live_sst_files1; + std::string manifest_file1; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &live_sst_files1, + &manifest_file1)); + + std::string new_cookie = "2"; + std::string new_epoch = "dca7f3e19212c4b3"; + auto delta = CloudManifestDelta{GetDBImpl()->GetNextFileNumber(), new_epoch}; + ASSERT_OK( + GetCloudFileSystemImpl()->RollNewCookie(dbname_, new_cookie, delta)); + bool applied = false; + ASSERT_OK(GetCloudFileSystemImpl()->ApplyCloudManifestDelta(delta, &applied)); + ASSERT_TRUE(applied); + + std::vector live_sst_files2; + std::string manifest_file2; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &live_sst_files2, + &manifest_file2)); + + EXPECT_EQ(live_sst_files1, live_sst_files2); + EXPECT_NE(manifest_file1, manifest_file2); + + CloseDB(); +} + +// After calling `ApplyCloudManifestDelta`, writes should be persisted in +// sst files only visible in new Manifest +TEST_F(CloudTest, WriteAfterUpdateCloudManifestArePersistedInNewEpoch) { + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Hello1", "world1")); + ASSERT_OK(db_->Flush(FlushOptions())); + + std::string new_cookie = "2"; + std::string new_epoch = "dca7f3e19212c4b3"; + + auto delta = CloudManifestDelta{GetDBImpl()->GetNextFileNumber(), new_epoch}; + ASSERT_OK( + GetCloudFileSystemImpl()->RollNewCookie(dbname_, new_cookie, delta)); + bool applied = false; + ASSERT_OK(GetCloudFileSystemImpl()->ApplyCloudManifestDelta(delta, &applied)); + ASSERT_TRUE(applied); + GetDBImpl()->NewManifestOnNextUpdate(); + + // following writes are not visible for old cookie + ASSERT_OK(db_->Put(WriteOptions(), "Hello2", "world2")); + ASSERT_OK(db_->Flush(FlushOptions())); + + // reopen with cookie = 1, new updates after rolling are not visible + CloseDB(); + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "1"; + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + OpenDB(); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "Hello1", &value)); + EXPECT_EQ(value, "world1"); + EXPECT_NOK(db_->Get(ReadOptions(), "Hello2", &value)); + CloseDB(); + + // reopen with cookie = 2, new updates should still be visible + CloseDB(); + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello1", &value)); + EXPECT_EQ(value, "world1"); + ASSERT_OK(db_->Get(ReadOptions(), "Hello2", &value)); + EXPECT_EQ(value, "world2"); + CloseDB(); + + // Make sure that the changes in cloud are correct + DestroyDir(dbname_); + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello1", &value)); + EXPECT_EQ(value, "world1"); + ASSERT_OK(db_->Get(ReadOptions(), "Hello2", &value)); + EXPECT_EQ(value, "world2"); + CloseDB(); +} + +// Test various cases of crashing in the middle during CloudManifestSwitch +TEST_F(CloudTest, CMSwitchCrashInMiddleTest) { + cloud_fs_options_.roll_cloud_manifest_on_open = false; + cloud_fs_options_.cookie_on_open = "1"; + + SyncPoint::GetInstance()->SetCallBack( + "CloudFileSystemImpl::RollNewCookie:AfterManifestCopy", [](void* arg) { + // Simulate the case of crash in the middle of + // RollNewCookie + *reinterpret_cast(arg) = Status::Aborted("Aborted"); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + + // case 1: Crash in the middle of updating local manifest files + // our guarantee: no CLOUDMANIFEST_new_cookie locally and remotely + OpenDB(); + + std::string new_cookie = "2"; + std::string new_epoch = "dca7f3e19212c4b3"; + + ASSERT_NOK(GetCloudFileSystemImpl()->RollNewCookie( + dbname_, new_cookie, + CloudManifestDelta{GetDBImpl()->GetNextFileNumber(), new_epoch})); + + CloseDB(); + + EXPECT_NOK(base_env_->FileExists(MakeCloudManifestFile(dbname_, new_cookie))); + + // case 2: Crash in the middle of uploading local manifest files + // our guarantee: no CLOUDMANFIEST_cookie remotely + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->SetCallBack( + "CloudFileSystemImpl::UploadManifest:AfterUploadManifest", [](void* arg) { + // Simulate the case of crashing in the middle of + // UploadManifest + *reinterpret_cast(arg) = Status::Aborted("Aborted"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + OpenDB(); + + auto delta = CloudManifestDelta{GetDBImpl()->GetNextFileNumber(), new_epoch}; + ASSERT_NOK( + GetCloudFileSystemImpl()->RollNewCookie(dbname_, new_cookie, delta)); + + ASSERT_NOK(GetCloudFileSystemImpl()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystemImpl()->GetDestBucketName(), + MakeCloudManifestFile(GetCloudFileSystemImpl()->GetDestObjectPath(), + new_cookie))); + + CloseDB(); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(CloudTest, RollNewEpochTest) { + OpenDB(); + auto epoch1 = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + EXPECT_OK(GetCloudFileSystemImpl()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystemImpl()->GetDestBucketName(), + ManifestFileWithEpoch(GetCloudFileSystemImpl()->GetDestObjectPath(), + epoch1))); + CloseDB(); + OpenDB(); + auto epoch2 = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + EXPECT_OK(GetCloudFileSystemImpl()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystemImpl()->GetDestBucketName(), + ManifestFileWithEpoch(GetCloudFileSystemImpl()->GetDestObjectPath(), + epoch2))); + CloseDB(); + EXPECT_NE(epoch1, epoch2); +} + +// Test that we can rollback to empty cookie +TEST_F(CloudTest, CookieBackwardsCompatibilityTest) { + cloud_fs_options_.resync_on_open = true; + cloud_fs_options_.roll_cloud_manifest_on_open = true; + + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // switch cookie + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + std::string value; + ASSERT_OK(db_->Get({}, "k1", &value)); + EXPECT_EQ(value, "v1"); + + ASSERT_OK(db_->Put({}, "k2", "v2")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // switch back to empty cookie + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = ""; + OpenDB(); + ASSERT_OK(db_->Get({}, "k1", &value)); + EXPECT_EQ(value, "v1"); + + ASSERT_OK(db_->Get({}, "k2", &value)); + EXPECT_EQ(value, "v2"); + CloseDB(); + + // open with both cookies being empty + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = ""; + OpenDB(); + ASSERT_OK(db_->Get({}, "k1", &value)); + EXPECT_EQ(value, "v1"); + + ASSERT_OK(db_->Get({}, "k2", &value)); + EXPECT_EQ(value, "v2"); + CloseDB(); +} + +// Test that once we switch to non empty cookie, we can rollback to +// empty cookie immediately and files are not deleted mistakenly +TEST_F(CloudTest, CookieRollbackTest) { + cloud_fs_options_.resync_on_open = true; + + // Create CLOUDMANFIEST with empty cookie + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = ""; + + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // Switch to cookie 1 + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + CloseDB(); + + // rollback to empty cookie + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = ""; + + // Setup syncpoint so that file deletion jobs are executed after we open db, + // but before we close db. This is to make sure that file deletion job + // won't delete files that are created when we open db (e.g., CLOUDMANIFEST + // files and MANIFEST files) and we can catch it in test if something is + // messed up + SyncPoint::GetInstance()->LoadDependency({ + {// only trigger file deletion job after db open + "CloudTest::CookieRollbackTest:AfterOpenDB", + "CloudSchedulerImpl::DoWork:BeforeGetJob"}, + }); + SyncPoint::GetInstance()->EnableProcessing(); + OpenDB(); + TEST_SYNC_POINT("CloudTest::CookieRollbackTest:AfterOpenDB"); + // File deletion jobs are only triggered after this. Once it's triggered, + // the job deletion queue is not empty + + std::string v; + ASSERT_OK(db_->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + + // wait until no scheduled jobs for current local cloud env + // After waiting, we know for sure that all the deletion jobs scheduled + // when opening db are executed + WaitUntilNoScheduledJobs(); + CloseDB(); + + SyncPoint::GetInstance()->DisableProcessing(); + + // reopen with empty cookie + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = ""; + OpenDB(); + ASSERT_OK(db_->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + CloseDB(); +} + +TEST_F(CloudTest, NewCookieOnOpenTest) { + cloud_fs_options_.cookie_on_open = "1"; + + // when opening new db, only new_cookie_on_open is used as CLOUDMANIFEST + // suffix + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + + ASSERT_NOK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + MakeCloudManifestFile(dbname_, "1"))); + // CLOUDMANIFEST-2 should exist since this is a new db + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + MakeCloudManifestFile(dbname_, "2"))); + CloseDB(); + + // reopen and switch cookie + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = "3"; + OpenDB(); + // CLOUDMANIFEST-3 is the new cloud manifest + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + MakeCloudManifestFile(dbname_, "3"))); + + std::string value; + ASSERT_OK(db_->Get({}, "k1", &value)); + EXPECT_EQ(value, "v1"); + + ASSERT_OK(db_->Put({}, "k2", "v2")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // reopen DB, but don't switch CLOUDMANIFEST + cloud_fs_options_.cookie_on_open = "3"; + cloud_fs_options_.new_cookie_on_open = "3"; + OpenDB(); + ASSERT_OK(db_->Get({}, "k2", &value)); + EXPECT_EQ(value, "v2"); + CloseDB(); +} + +// Test invisible file deletion when db is opened. +TEST_F(CloudTest, InvisibleFileDeletionOnDBOpenTest) { + std::string cookie1 = "", cookie2 = "-1-1"; + cloud_fs_options_.keep_local_sst_files = true; + + // opening with cookie1 + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + std::vector cookie1_sst_files; + std::string cookie1_manifest_file; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &cookie1_sst_files, + &cookie1_manifest_file)); + ASSERT_EQ(cookie1_sst_files.size(), 1); + CloseDB(); + + // MANIFEST file path of cookie1 + auto cookie1_manifest_filepath = dbname_ + pathsep + cookie1_manifest_file; + // CLOUDMANIFEST file path of cookie1 + auto cookie1_cm_filepath = + MakeCloudManifestFile(dbname_, cloud_fs_options_.cookie_on_open); + // sst file path of cookie1 + auto cookie1_sst_filepath = dbname_ + pathsep + cookie1_sst_files[0]; + + // opening with cookie1 and switch to cookie2 + cloud_fs_options_.cookie_on_open = cookie1; + cloud_fs_options_.new_cookie_on_open = cookie2; + OpenDB(); + ASSERT_OK(db_->Put({}, "k2", "v2")); + ASSERT_OK(db_->Flush({})); + // CM/M/sst files of cookie1 won't be deleted + for (auto path : + {cookie1_cm_filepath, cookie1_manifest_filepath, cookie1_sst_filepath}) { + EXPECT_OK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + path, kIOOptions, kDbg)); + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), path)); + } + + std::vector cookie2_sst_files; + std::string cookie2_manifest_file; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &cookie2_sst_files, + &cookie2_manifest_file)); + ASSERT_EQ(cookie2_sst_files.size(), 2); + CloseDB(); + + // MANIFEST file path of cookie2 + auto cookie2_manifest_filepath = dbname_ + pathsep + cookie2_manifest_file; + // CLOUDMANIFEST file path of cookie2 + auto cookie2_cm_filepath = + MakeCloudManifestFile(dbname_, cloud_fs_options_.new_cookie_on_open); + // find sst file path of cookie2 + auto cookie2_sst_filepath = dbname_ + pathsep + cookie2_sst_files[0]; + if (cookie2_sst_filepath == cookie1_sst_filepath) { + cookie2_sst_filepath = dbname_ + pathsep + cookie2_sst_files[1]; + } + + // Now we reopen db with cookie1 to force deleting all files generated in + // cookie2 + + // number of file deletion jobs is executed + std::atomic_int num_job_executed(0); + + // Syncpoint callback so that we can check when the files are actually + // deleted(which is async) + SyncPoint::GetInstance()->SetCallBack( + "LocalCloudScheduler::ScheduleJob:AfterEraseJob", [&](void* /*arg*/) { + num_job_executed++; + if (num_job_executed == 3) { + // CM/M/SST files of cookie2 are deleted in s3 + for (auto path : {cookie2_manifest_filepath, cookie2_cm_filepath, + cookie2_sst_filepath}) { + EXPECT_NOK( + GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), path)); + } + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + + // reopen db with cookie1 will force all files generated in cookie2 to be + // deleted + cloud_fs_options_.cookie_on_open = cookie1; + cloud_fs_options_.new_cookie_on_open = cookie1; + OpenDB(); + // local obsolete CM/M/SST files will be deleted immediately + // files in cloud will be deleted later (checked in the callback) + for (auto path : + {cookie2_cm_filepath, cookie2_manifest_filepath, cookie2_sst_filepath}) { + EXPECT_NOK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + path, kIOOptions, kDbg)) + << path; + } + CloseDB(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + WaitUntilNoScheduledJobs(); + // Make sure that these files are indeed deleted + EXPECT_EQ(num_job_executed, 3); +} + +// Verify that when opening with `delete_cloud_invisible_files_on_open`, local +// files will be deleted while cloud files will be kept +TEST_F(CloudTest, DisableInvisibleFileDeletionOnOpenTest) { + std::string cookie1 = "", cookie2 = "1"; + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.cookie_on_open = cookie1; + cloud_fs_options_.new_cookie_on_open = cookie1; + + // opening with cookie1 + OpenDB(); + // generate sst file with cookie1 + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + + std::vector cookie1_sst_files; + std::string cookie1_manifest_file; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &cookie1_sst_files, + &cookie1_manifest_file)); + ASSERT_EQ(cookie1_sst_files.size(), 1); + + auto cookie1_manifest_filepath = dbname_ + pathsep + cookie1_manifest_file; + auto cookie1_cm_filepath = + MakeCloudManifestFile(dbname_, cloud_fs_options_.cookie_on_open); + auto cookie1_sst_filepath = dbname_ + pathsep + cookie1_sst_files[0]; + + ASSERT_OK(SwitchToNewCookie(cookie2)); + + // generate sst file with cookie2 + ASSERT_OK(db_->Put({}, "k2", "v2")); + ASSERT_OK(db_->Flush({})); + + std::vector cookie2_sst_files; + std::string cookie2_manifest_file; + + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &cookie2_sst_files, + &cookie2_manifest_file)); + ASSERT_EQ(cookie2_sst_files.size(), 2); + + // exclude cookie1_sst_files from cookie2_sst_files + std::sort(cookie2_sst_files.begin(), cookie2_sst_files.end()); + std::set_difference(cookie2_sst_files.begin(), cookie2_sst_files.end(), + cookie1_sst_files.begin(), cookie1_sst_files.end(), + cookie2_sst_files.begin()); + cookie2_sst_files.resize(1); + + auto cookie2_manifest_filepath = dbname_ + pathsep + cookie2_manifest_file; + auto cookie2_cm_filepath = MakeCloudManifestFile(dbname_, cookie2); + auto cookie2_sst_filepath = dbname_ + pathsep + cookie2_sst_files[0]; + + CloseDB(); + + // reopen with cookie1 = "". cookie2 sst files are not visible + cloud_fs_options_.delete_cloud_invisible_files_on_open = false; + OpenDB(); + // files from cookie2 are deleted locally but exists in s3 + for (auto path : + {cookie2_cm_filepath, cookie2_manifest_filepath, cookie2_sst_filepath}) { + EXPECT_NOK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + path, kIOOptions, kDbg)); + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), path)); + } + std::string value; + EXPECT_OK(db_->Get({}, "k1", &value)); + EXPECT_NOK(db_->Get({}, "k2", &value)); + CloseDB(); + + cloud_fs_options_.cookie_on_open = cookie2; + cloud_fs_options_.new_cookie_on_open = cookie2; + // reopen with cookie2 also works since it will fetch files from s3 directly + OpenDB(); + EXPECT_OK(db_->Get({}, "k1", &value)); + EXPECT_OK(db_->Get({}, "k2", &value)); + CloseDB(); +} + +TEST_F(CloudTest, DisableObsoleteFileDeletionOnOpenTest) { + // Generate a few obsolete files first + options_.num_levels = 3; + options_.level0_file_num_compaction_trigger = 3; + options_.write_buffer_size = 110 << 10; // 110KB + options_.arena_block_size = 4 << 10; + options_.keep_log_file_num = 1; + options_.use_options_file = false; + // put wal files into one directory so that we don't need to count number of + // local wal files + options_.wal_dir = dbname_ + "/wal"; + cloud_fs_options_.keep_local_sst_files = true; + // disable cm roll so that no new manifest files generated + cloud_fs_options_.roll_cloud_manifest_on_open = false; + + WriteOptions wo; + wo.disableWAL = true; + OpenDB(); + ASSERT_OK(SwitchToNewCookie("")); + db_->DisableFileDeletions(); + + std::vector files; + + ASSERT_OK(db_->Put(wo, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + ASSERT_OK(db_->Put(wo, "k1", "v2")); + ASSERT_OK(db_->Flush({})); + db_->GetLiveFilesMetaData(&files); + ASSERT_EQ(files.size(), 2); + + auto local_files = GetAllLocalFiles(); + // CM, MANIFEST1, MANIFEST2, CURRENT, IDENTITY, 2 sst files, wal directory + EXPECT_EQ(local_files.size(), 8); + + ASSERT_OK(GetDBImpl()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); + + files.clear(); + db_->GetLiveFilesMetaData(&files); + ASSERT_EQ(files.size(), 1); + + local_files = GetAllLocalFiles(); + // obsolete files are not deleted, also one extra sst files generated after + // compaction + EXPECT_EQ(local_files.size(), 9); + + CloseDB(); + + options_.disable_delete_obsolete_files_on_open = true; + OpenDB(); + // obsolete files are not deleted + EXPECT_EQ(GetAllLocalFiles().size(), 8); + // obsolete files are deleted! + db_->EnableFileDeletions(false /* force */); + EXPECT_EQ(GetAllLocalFiles().size(), 6); + CloseDB(); +} + +// Verify invisible CLOUDMANIFEST file deleteion +TEST_F(CloudTest, CloudManifestFileDeletionTest) { + // create CLOUDMANIFEST file in s3 + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = ""; + OpenDB(); + CloseDB(); + + // create CLOUDMANIFEST-1 file in s3 + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + CloseDB(); + + auto checkCloudManifestFileExistence = [&](std::vector cookies) { + for (auto cookie : cookies) { + EXPECT_OK( + GetCloudFileSystemImpl()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystemImpl()->GetDestBucketName(), + MakeCloudManifestFile( + GetCloudFileSystemImpl()->GetDestObjectPath(), cookie))); + } + }; + + // double check that the CM files are indeed created + checkCloudManifestFileExistence({"", "1"}); + + // set large file deletion delay so that files are not deleted immediately + cloud_fs_options_.cloud_file_deletion_delay = std::chrono::hours(1); + EXPECT_EQ(GetCloudFileSystemImpl()->TEST_NumScheduledJobs(), 0); + + // now we reopen the db with empty cookie_on_open and new_cookie_on_open = + // "1". Double check that CLOUDMANIFEST-1 is not deleted! + OpenDB(); + checkCloudManifestFileExistence({"", "1"}); + CloseDB(); + + // switch to new cookie + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + // double check that CLOUDMANIFEST is never deleted + checkCloudManifestFileExistence({"", "1", "2"}); + CloseDB(); +} + +// verify that two writers with different cookies can write concurrently +TEST_F(CloudTest, TwoConcurrentWritersCookieNotEmpty) { + auto firstDB = dbname_; + auto secondDB = dbname_ + "-1"; + + DBCloud *db1, *db2; + Env *aenv1, *aenv2; + + auto openDB1 = [&] { + dbname_ = firstDB; + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + db1 = db_; + db_ = nullptr; + aenv1 = aenv_.release(); + }; + auto openDB1NoCookieSwitch = [&](std::string const& cookie) { + dbname_ = firstDB; + // when reopening DB1, we should set cookie_on_open = 2 to make sure + // we are opening with the right CM/M files + cloud_fs_options_.cookie_on_open = cookie; + cloud_fs_options_.new_cookie_on_open = cookie; + OpenDB(); + db1 = db_; + db_ = nullptr; + aenv1 = aenv_.release(); + }; + auto openDB2 = [&] { + dbname_ = secondDB; + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = "3"; + OpenDB(); + db2 = db_; + db_ = nullptr; + aenv2 = aenv_.release(); + }; + auto openDB2NoCookieSwitch = [&](std::string const& cookie) { + dbname_ = secondDB; + // when reopening DB1, we should set cookie_on_open = 3 to make sure + // we are opening with the right CM/M files + cloud_fs_options_.cookie_on_open = cookie; + cloud_fs_options_.new_cookie_on_open = cookie; + OpenDB(); + db2 = db_; + db_ = nullptr; + aenv2 = aenv_.release(); + }; + auto closeDB1 = [&] { + db_ = db1; + aenv_.reset(aenv1); + CloseDB(); + }; + auto closeDB2 = [&] { + db_ = db2; + aenv_.reset(aenv2); + CloseDB(); + }; + + openDB1(); + db1->Put({}, "k1", "v1"); + db1->Flush({}); + closeDB1(); + + // cleanup memtable of db1 to make sure k1/v1 indeed exists in sst files + DestroyDir(firstDB); + openDB1NoCookieSwitch("2" /* cookie */); + + // opening DB2 and running concurrently + openDB2(); + + db1->Put({}, "k2", "v2"); + db1->Flush({}); + + db2->Put({}, "k3", "v3"); + db2->Flush({}); + + std::string v; + ASSERT_OK(db1->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + ASSERT_OK(db2->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + + ASSERT_OK(db1->Get({}, "k2", &v)); + EXPECT_EQ(v, "v2"); + // k2 is written in db1 after db2 is opened, so it's not visible by db2 + EXPECT_NOK(db2->Get({}, "k2", &v)); + + // k3 is written in db2 after db1 is opened, so it's not visible by db1 + EXPECT_NOK(db1->Get({}, "k3", &v)); + ASSERT_OK(db2->Get({}, "k3", &v)); + EXPECT_EQ(v, "v3"); + + closeDB1(); + closeDB2(); + + // cleanup local state to make sure writes indeed exist in sst files + DestroyDir(firstDB); + DestroyDir(secondDB); + + // We can't reopen db with cookie=2 anymore, since that will remove all the + // files for cookie=3. This is guaranteed since whenever we reopen db, we + // always get the latest cookie from metadata store. + openDB2NoCookieSwitch("3" /* cookie */); + + ASSERT_OK(db2->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + EXPECT_NOK(db2->Get({}, "k2", &v)); + ASSERT_OK(db2->Get({}, "k3", &v)); + EXPECT_EQ(v, "v3"); + closeDB2(); +} + +// if file deletion fails, db should still be reopend +TEST_F(CloudTest, FileDeletionFailureIgnoredTest) { + std::string manifest_file_path; + OpenDB(); + auto epoch = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + manifest_file_path = ManifestFileWithEpoch(dbname_, epoch); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // bump the manifest epoch so that next time opening it, manifest file will be + // deleted + OpenDB(); + CloseDB(); + + // return error during file deletion + SyncPoint::GetInstance()->SetCallBack( + "CloudFileSystemImpl::DeleteLocalInvisibleFiles:AfterListLocalFiles", + [](void* arg) { + auto st = reinterpret_cast(arg); + *st = + Status::Aborted("Manual abortion to simulate file listing failure"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + OpenDB(); + std::string v; + ASSERT_OK(db_->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + // Due to the Aborted error we generated, the manifest file which should have + // been deleted still exists. + EXPECT_OK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + manifest_file_path, kIOOptions, kDbg)); + CloseDB(); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + // reopen the db should delete the obsolete manifest file after we cleanup + // syncpoint + OpenDB(); + EXPECT_NOK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + manifest_file_path, kIOOptions, kDbg)); + CloseDB(); +} + +// verify that as long as CloudFileSystem is destructed, the file delection jobs +// waiting in the queue will be canceled +TEST_F(CloudTest, FileDeletionJobsCanceledWhenCloudEnvDestructed) { + std::string manifest_file_path; + OpenDB(); + auto epoch = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + manifest_file_path = ManifestFileWithEpoch(dbname_, epoch); + CloseDB(); + + // bump epoch of manifest file so next open will delete previous manifest file + OpenDB(); + CloseDB(); + + // Setup syncpoint dependency to prevent cloud scheduler from executing file + // deletion job in the queue until CloudFileSystem is destructed + SyncPoint::GetInstance()->LoadDependency( + {{"CloudTest::FileDeletionJobsCanceledWhenCloudEnvDestructed:" + "AfterCloudEnvDestruction", + "CloudSchedulerImpl::DoWork:BeforeGetJob"}}); + SyncPoint::GetInstance()->EnableProcessing(); + OpenDB(); + CloseDB(); + + // delete CloudFileSystem will cancel all file deletion jobs in the queue + aenv_.reset(); + + // jobs won't be executed until after this point. But the file deletion job + // in the queue should have already been canceled + TEST_SYNC_POINT( + "CloudTest::FileDeletionJobsCanceledWhenCloudEnvDestructed:" + "AfterCloudEnvDestruction"); + + SyncPoint::GetInstance()->DisableProcessing(); + + // recreate cloud env to check s3 file existence + CreateCloudEnv(); + + // wait for a while so that the rest uncanceled jobs are indeed executed by + // cloud scheduler. + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + // the old manifest file is still there! + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), manifest_file_path)); + + // reopen db to delete the old manifest file + OpenDB(); + EXPECT_NOK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), manifest_file_path)); + CloseDB(); +} + +// The failure case of opening a corrupted db which doesn't have MANIFEST file +TEST_F(CloudTest, OpenWithManifestMissing) { + cloud_fs_options_.resync_on_open = true; + cloud_fs_options_.resync_manifest_on_open = true; + OpenDB(); + auto epoch = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + CloseDB(); + + // Remove the MANIFEST file from s3 + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->DeleteCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + ManifestFileWithEpoch(GetCloudFileSystem()->GetSrcObjectPath(), epoch))); + DestroyDir(dbname_); + + EXPECT_TRUE(checkOpen().IsCorruption()); +} + +// verify that ephemeral clone won't reference old sst file if it's reopened +// after sst file deletion on durable +// Ordering of events: +// - open durable (epoch = 1) +// - open ephemeral (epoch = 1, new_epoch=?) +// - durable delete sst files +// - reopen ephemeral (epoch = 1) +TEST_F(CloudTest, ReopenEphemeralAfterFileDeletion) { + cloud_fs_options_.resync_on_open = true; + cloud_fs_options_.keep_local_sst_files = false; + + auto durableDBName = dbname_; + + DBCloud *durable, *ephemeral; + Env *durableEnv, *ephemeralEnv; + std::vector durableHandles; + + auto openDurable = [&] { + dbname_ = durableDBName; + + OpenDB(&durableHandles); + durable = db_; + db_ = nullptr; + durableEnv = aenv_.release(); + }; + + auto openEphemeral = [&] { + std::unique_ptr env; + std::unique_ptr cloud_db; + // open ephemeral clone with force_keep_local_on_invalid_dest_bucket=false + // so that sst files are not kept locally + ASSERT_OK(CloneDB("ephemeral" /* clone_name */, "" /* dest_bucket_name */, + "" /* dest_object_path */, &cloud_db, &env, + false /* force_keep_local_on_invalid_dest_bucket */)); + ephemeral = cloud_db.release(); + ephemeralEnv = env.release(); + }; + + auto closeDurable = [&] { + db_ = durable; + aenv_.reset(durableEnv); + CloseDB(&durableHandles); + }; + + auto closeEphemeral = [&] { + db_ = ephemeral; + aenv_.reset(ephemeralEnv); + CloseDB(); + }; + + options_.disable_auto_compactions = true; + openDurable(); + + ASSERT_OK(durable->Put({}, "key1", "val1")); + ASSERT_OK(durable->Flush({})); + + ASSERT_OK(durable->Put({}, "key1", "val2")); + ASSERT_OK(durable->Flush({})); + + closeDurable(); + + openDurable(); + openEphemeral(); + + std::vector files; + durable->GetLiveFilesMetaData(&files); + ASSERT_EQ(files.size(), 2); + // trigger compaction on durable with trivial file moves disabled, which will + // delete previously generated sst files + ASSERT_OK( + static_cast(durable->GetBaseDB()) + ->TEST_CompactRange(0, nullptr, nullptr, durableHandles[0], true)); + files.clear(); + durable->GetLiveFilesMetaData(&files); + ASSERT_EQ(files.size(), 1); + + // reopen ephemeral + closeEphemeral(); + openEphemeral(); + + std::string val; + ASSERT_OK(ephemeral->Get({}, "key1", &val)); + EXPECT_EQ(val, "val2"); + closeEphemeral(); + closeDurable(); +} + +TEST_F(CloudTest, SanitizeDirectoryTest) { + cloud_fs_options_.keep_local_sst_files = true; + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + auto local_files = GetAllLocalFiles(); + // Files exist locally: cm/m, sst, options-xxx, xxx.log, identity, current + EXPECT_EQ(local_files.size(), 7); + + EXPECT_OK( + GetCloudFileSystemImpl()->SanitizeDirectory(options_, dbname_, false)); + + // cleaning up during sanitization not triggered + EXPECT_EQ(local_files.size(), GetAllLocalFiles().size()); + + // Delete the local CLOUDMANIFEST file to force cleaning up + ASSERT_OK( + base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); + + EXPECT_OK( + GetCloudFileSystemImpl()->SanitizeDirectory(options_, dbname_, false)); + + local_files = GetAllLocalFiles(); + // IDENTITY file is downloaded after cleaning up, which is the only file that + // exists locally + EXPECT_EQ(GetAllLocalFiles().size(), 1); + + // reinitialize local directory + OpenDB(); + CloseDB(); + local_files = GetAllLocalFiles(); + // we have two local MANIFEST files after opening second time. + EXPECT_EQ(local_files.size(), 8); + + // create some random directory, which is expected to be not deleted + ASSERT_OK(base_env_->CreateDir(dbname_ + "/tmp_writes")); + + // Delete the local CLOUDMANIFEST file to force cleaning up + ASSERT_OK( + base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); + + ASSERT_OK( + GetCloudFileSystemImpl()->SanitizeDirectory(options_, dbname_, false)); + + // IDENTITY file + the random directory we created + EXPECT_EQ(GetAllLocalFiles().size(), 2); + + // reinitialize local directory + OpenDB(); + CloseDB(); + + // inject io errors during cleaning up. The io errors should be ignored + SyncPoint::GetInstance()->SetCallBack( + "CloudFileSystemImpl::SanitizeDirectory:AfterDeleteFile", [](void* arg) { + auto st = reinterpret_cast(arg); + *st = Status::IOError("Inject io error during cleaning up"); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + // Delete the local CLOUDMANIFEST file to force cleaning up + ASSERT_OK( + base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); + + ASSERT_OK( + GetCloudFileSystemImpl()->SanitizeDirectory(options_, dbname_, false)); + SyncPoint::GetInstance()->DisableProcessing(); +} + +TEST_F(CloudTest, CloudFileDeletionNotTriggeredIfDestBucketNotSet) { + std::vector files_to_delete; + + // generate invisible MANIFEST file to delete + OpenDB(); + std::string manifest_file = ManifestFileWithEpoch( + dbname_, GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch()); + files_to_delete.push_back(basename(manifest_file)); + CloseDB(); + + // generate obsolete sst files to delete + options_.disable_delete_obsolete_files_on_open = true; + cloud_fs_options_.delete_cloud_invisible_files_on_open = false; + OpenDB(); + GenerateObsoleteFilesOnEmptyDB(GetDBImpl(), GetCloudFileSystem(), + &files_to_delete); + CloseDB(); + + options_.disable_delete_obsolete_files_on_open = false; + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + cloud_fs_options_.delete_cloud_invisible_files_on_open = true; + OpenDB(); + WaitUntilNoScheduledJobs(); + for (auto& fname : files_to_delete) { + EXPECT_OK(ExistsCloudObject(fname)); + } + CloseDB(); + + cloud_fs_options_.dest_bucket = cloud_fs_options_.src_bucket; + OpenDB(); + WaitUntilNoScheduledJobs(); + for (auto& fname : files_to_delete) { + EXPECT_NOK(ExistsCloudObject(fname)); + } + CloseDB(); +} + +TEST_F(CloudTest, ScheduleFileDeletionTest) { + auto scheduler = CloudScheduler::Get(); + auto deletion_scheduler = + CloudFileDeletionScheduler::Create(scheduler, std::chrono::seconds(0)); + + std::atomic_int counter{0}; + int num_file_deletions = 10; + for (int i = 0; i < num_file_deletions; i++) { + ASSERT_OK(deletion_scheduler->ScheduleFileDeletion( + std::to_string(i) + ".sst", [&counter]() { counter++; })); + } + + // wait until no scheduled jobs + while (scheduler->TEST_NumScheduledJobs() > 0) { + usleep(100); + } + EXPECT_EQ(counter, num_file_deletions); + EXPECT_EQ(deletion_scheduler->TEST_FilesToDelete().size(), 0); +} + +TEST_F(CloudTest, SameFileDeletedMultipleTimesTest) { + auto scheduler = CloudScheduler::Get(); + auto deletion_scheduler = + CloudFileDeletionScheduler::Create(scheduler, std::chrono::hours(1)); + ASSERT_OK(deletion_scheduler->ScheduleFileDeletion("filename", []() {})); + ASSERT_OK(deletion_scheduler->ScheduleFileDeletion("filename", []() {})); + EXPECT_EQ(deletion_scheduler->TEST_FilesToDelete().size(), 1); +} + +TEST_F(CloudTest, UnscheduleFileDeletionTest) { + auto scheduler = CloudScheduler::Get(); + auto deletion_scheduler = + CloudFileDeletionScheduler::Create(scheduler, std::chrono::hours(1)); + + std::atomic_int counter{0}; + int num_file_deletions = 10; + std::vector files_to_delete; + for (int i = 0; i < num_file_deletions; i++) { + std::string filename = std::to_string(i) + ".sst"; + files_to_delete.push_back(filename); + ASSERT_OK(deletion_scheduler->ScheduleFileDeletion( + filename, [&counter]() { counter++; })); + } + auto actual_files_to_delete = deletion_scheduler->TEST_FilesToDelete(); + std::sort(actual_files_to_delete.begin(), actual_files_to_delete.end()); + EXPECT_EQ(actual_files_to_delete, files_to_delete); + + int num_scheduled_jobs = num_file_deletions; + for (auto& fname : files_to_delete) { + deletion_scheduler->UnscheduleFileDeletion(fname); + num_scheduled_jobs -= 1; + EXPECT_EQ(scheduler->TEST_NumScheduledJobs(), num_scheduled_jobs); + } +} + +TEST_F(CloudTest, UnscheduleUnknownFileTest) { + auto scheduler = CloudScheduler::Get(); + auto deletion_scheduler = + CloudFileDeletionScheduler::Create(scheduler, std::chrono::hours(1)); + deletion_scheduler->UnscheduleFileDeletion("unknown file"); +} + +// Verifies that as long as `CloudFileDeletionScheduler` is destructed, no file +// deletion job will actually be scheduled +// This is also a repro of SYS-3456, which is a race between CloudFileSystemImpl +// destruction and cloud file deletion +// TODO(SYS-3996) Re-enable +TEST_F( + CloudTest, + DISABLED_FileDeletionNotScheduledOnceCloudFileDeletionSchedulerDestructed) { + // Generate some invisible files to delete + // Disable file deletion to make sure these files are not deleted + // automatically + options_.disable_delete_obsolete_files_on_open = true; + cloud_fs_options_.delete_cloud_invisible_files_on_open = false; + OpenDB(); + std::vector obsolete_files; + GenerateObsoleteFilesOnEmptyDB(GetDBImpl(), GetCloudFileSystem(), + &obsolete_files); + CloseDB(); + + // Order of execution: + // - scheduled file deletion job starts running (but file not deleted yet) + // - destruct CloudFileDeletionScheduler + // - file deletion job deletes the file + SyncPoint::GetInstance()->LoadDependency( + {{ + // `BeforeCancelJobs` happens-after `BeforeFileDeletion` + "CloudFileDeletionScheduler::ScheduleFileDeletion:" + "BeforeFileDeletion", + "CloudFileDeletionScheduler::~CloudFileDeletionScheduler:" + "BeforeCancelJobs", + }, + {"CloudFileDeletionScheduler::~CloudFileDeletionScheduler:" + "BeforeCancelJobs", + "CloudFileDeletionScheduler::ScheduleFileDeletion:AfterFileDeletion"}}); + + std::atomic num_jobs_finished{0}; + SyncPoint::GetInstance()->SetCallBack( + "CloudFileDeletionScheduler::ScheduleFileDeletion:AfterFileDeletion", + [&](void* arg) { + ASSERT_NE(nullptr, arg); + auto file_deleted = *reinterpret_cast(arg); + EXPECT_FALSE(file_deleted); + num_jobs_finished++; + }); + SyncPoint::GetInstance()->EnableProcessing(); + // file not deleted immediately but just scheduled + ASSERT_OK( + aenv_->GetFileSystem()->DeleteFile(obsolete_files[0], kIOOptions, kDbg)); + EXPECT_EQ(GetCloudFileSystemImpl()->TEST_NumScheduledJobs(), 1); + // destruct `CloudFileSystem`, which will cause `CloudFileDeletionScheduler` + // to be destructed + aenv_.reset(); + // wait until file deletion job is done + while (num_jobs_finished.load() != 1) { + usleep(100); + } + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); +} + +TEST_F(CloudTest, UniqueCurrentEpochAcrossDBRestart) { + constexpr int kNumRestarts = 3; + std::unordered_set epochs; + for (int i = 0; i < kNumRestarts; i++) { + OpenDB(); + auto [it, inserted] = epochs.emplace( + GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch()); + EXPECT_TRUE(inserted); + CloseDB(); + } +} + +TEST_F(CloudTest, ReplayCloudManifestDeltaTest) { + OpenDB(); + constexpr int kNumKeys = 3; + std::vector deltas; + for (int i = 0; i < kNumKeys; i++) { + ASSERT_OK(db_->Put({}, "k" + std::to_string(i), "v" + std::to_string(i))); + ASSERT_OK(db_->Flush({})); + + auto cookie1 = std::to_string(i) + "0"; + auto filenum1 = db_->GetNextFileNumber(); + deltas.push_back({filenum1, cookie1}); + ASSERT_OK(SwitchToNewCookie(cookie1)); + + // apply again with same file number but different cookie + auto cookie2 = std::to_string(i) + "1"; + auto filenum2 = db_->GetNextFileNumber(); + EXPECT_EQ(filenum1, filenum2); + deltas.push_back({filenum2, cookie2}); + ASSERT_OK(SwitchToNewCookie(cookie2)); + } + + auto currentEpoch = + GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + + // replay the deltas one more time + for (auto const& delta : deltas) { + EXPECT_TRUE(GetCloudFileSystem() + ->RollNewCookie(dbname_, delta.epoch, delta) + .IsInvalidArgument()); + bool applied = false; + ASSERT_OK(GetCloudFileSystem()->ApplyCloudManifestDelta(delta, &applied)); + EXPECT_FALSE(applied); + // current epoch not changed + EXPECT_EQ(GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(), + currentEpoch); + } + + for (int i = 0; i < kNumKeys; i++) { + std::string v; + ASSERT_OK(db_->Get({}, "k" + std::to_string(i), &v)); + EXPECT_EQ(v, "v" + std::to_string(i)); + } + CloseDB(); +} + +TEST_F(CloudTest, CreateIfMissing) { + options_.create_if_missing = false; + ASSERT_TRUE(checkOpen().IsNotFound()); + options_.create_if_missing = true; + OpenDB(); + CloseDB(); + + // delete `CURRENT` file + DestroyDir(dbname_); + OpenDB(); + CloseDB(); + + // Delete `CLOUDMANFIEST` file in cloud + auto cloudManifestFile = + MakeCloudManifestFile(dbname_, cloud_fs_options_.new_cookie_on_open); + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->DeleteCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), cloudManifestFile)); + + options_.create_if_missing = false; + ASSERT_TRUE(checkOpen().IsNotFound()); +} + +} // namespace ROCKSDB_NAMESPACE + +// A black-box test for the cloud wrapper around rocksdb +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + // Aws::InitAPI(Aws::SDKOptions()); + auto r = RUN_ALL_TESTS(); + // Aws::ShutdownAPI(Aws::SDKOptions()); + return r; +} + +#else // USE_GCP + +#include + +int main(int, char**) { + fprintf(stderr, + "SKIPPED as DBCloud is supported only when USE_GCP is defined.\n"); + return 0; +} +#endif + +#else // ROCKSDB_LITE + +#include + +int main(int, char**) { + fprintf(stderr, "SKIPPED as DBCloud is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // !ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/gcp/gcp_file_system.cc b/cloud/gcp/gcp_file_system.cc new file mode 100644 index 00000000000..92b43fb35e4 --- /dev/null +++ b/cloud/gcp/gcp_file_system.cc @@ -0,0 +1,111 @@ +#ifndef ROCKSDB_LITE + +#include + +#include "rocksdb/convenience.h" +#include "rocksdb/utilities/object_registry.h" + +#include "cloud/gcp/gcp_file_system.h" +#include "cloud/cloud_storage_provider_impl.h" + +#ifdef USE_GCP + +namespace ROCKSDB_NAMESPACE { +GcpFileSystem::GcpFileSystem(std::shared_ptr const& underlying_fs, + CloudFileSystemOptions const& cloud_options, + std::shared_ptr const& info_log) + : CloudFileSystemImpl(cloud_options, underlying_fs, info_log) {} + +Status GcpFileSystem::NewGcpFileSystem( + std::shared_ptr const& base_fs, + CloudFileSystemOptions const& cloud_options, + std::shared_ptr const& info_log, CloudFileSystem** cfs) { + Status status; + *cfs = nullptr; + auto fs = base_fs; + if (!fs) { + fs = FileSystem::Default(); + } + std::unique_ptr gfs( + new GcpFileSystem(fs, cloud_options, info_log)); + auto env = gfs->NewCompositeEnvFromThis(Env::Default()); + ConfigOptions config_options; + config_options.env = env.get(); + status = gfs->PrepareOptions(config_options); + if (status.ok()) { + *cfs = gfs.release(); + } + return status; +} + +Status GcpFileSystem::NewGcpFileSystem(std::shared_ptr const& fs, + std::unique_ptr* cfs) { + cfs->reset(new GcpFileSystem(fs, CloudFileSystemOptions())); + return Status::OK(); +} + +Status GcpFileSystem::PrepareOptions(ConfigOptions const& options) { + if (cloud_fs_options.src_bucket.GetRegion().empty() || + cloud_fs_options.dest_bucket.GetRegion().empty()) { + std::string region; + if (!CloudFileSystemOptions::GetNameFromEnvironment( + "GCP_DEFAULT_REGION", "gcp_default_region", ®ion)) { + region = default_region; + } + if (cloud_fs_options.src_bucket.GetRegion().empty()) { + cloud_fs_options.src_bucket.SetRegion(region); + } + if (cloud_fs_options.dest_bucket.GetRegion().empty()) { + cloud_fs_options.dest_bucket.SetRegion(region); + } + } + if (cloud_fs_options.storage_provider == nullptr) { + // If the user has not specified a storage provider, then use the default + // provider for this CloudType + Status s = CloudStorageProvider::CreateFromString( + options, CloudStorageProviderImpl::kGcs(), + &cloud_fs_options.storage_provider); + if (!s.ok()) { + return s; + } + } + return CloudFileSystemImpl::PrepareOptions(options); +} + +int CloudFileSystemImpl::RegisterGcpObjects(ObjectLibrary& library, + std::string const& /*arg*/) { + int count = 0; +#ifdef USE_GCP + library.AddFactory( + CloudFileSystemImpl::kGcp(), + [](std::string const& /*uri*/, std::unique_ptr* guard, + std::string* errmsg) { + std::unique_ptr cguard; + Status s = + GcpFileSystem::NewGcpFileSystem(FileSystem::Default(), &cguard); + if (s.ok()) { + guard->reset(cguard.release()); + return guard->get(); + } else { + *errmsg = s.ToString(); + return static_cast(nullptr); + } + }); + count++; +#endif + library.AddFactory( + CloudStorageProviderImpl::kGcs(), + [](std::string const& /*uri*/, + std::unique_ptr* guard, std::string* errmsg) { + Status s = CloudStorageProviderImpl::CreateGcsProvider(guard); + if (!s.ok()) { + *errmsg = s.ToString(); + } + return guard->get(); + }); + count++; + return count; +} +} // namespace ROCKSDB_NAMESPACE +#endif // USE_GCP +#endif // ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/gcp/gcp_file_system.h b/cloud/gcp/gcp_file_system.h new file mode 100644 index 00000000000..1e1df6dcb1f --- /dev/null +++ b/cloud/gcp/gcp_file_system.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "cloud/cloud_file_system_impl.h" + +#ifdef USE_GCP + +namespace ROCKSDB_NAMESPACE { +class GcpFileSystem : public CloudFileSystemImpl { + public: + static Status NewGcpFileSystem(const std::shared_ptr& base_fs, + const CloudFileSystemOptions& cloud_options, + const std::shared_ptr& info_log, + CloudFileSystem** cfs); + static Status NewGcpFileSystem(const std::shared_ptr& fs, + std::unique_ptr* cfs); + virtual ~GcpFileSystem() {} + + static char const* kName() { return kGcp(); } + const char* Name() const override { return kGcp(); } + + Status PrepareOptions(const ConfigOptions& options) override; + + static constexpr char const* default_region = "asia-northeast1"; + + private: + explicit GcpFileSystem(const std::shared_ptr& underlying_fs, + const CloudFileSystemOptions& cloud_options, + const std::shared_ptr& info_log = nullptr); +}; + +class GcpCloudOptions { + public: + static Status GetClientConfiguration( + CloudFileSystem* fs, std::string const& region, + google::cloud::Options& options); +}; +} // namespace ROCKSDB_NAMESPACE +#endif \ No newline at end of file diff --git a/cloud/gcp/gcp_file_system_test.cc b/cloud/gcp/gcp_file_system_test.cc new file mode 100644 index 00000000000..a7963bb2c1e --- /dev/null +++ b/cloud/gcp/gcp_file_system_test.cc @@ -0,0 +1,248 @@ +// Copyright (c) 2017 Rockset + +#include "rocksdb/cloud/cloud_file_system.h" + +#include "cloud/cloud_log_controller_impl.h" +#include "cloud/cloud_storage_provider_impl.h" +#include "rocksdb/cloud/cloud_log_controller.h" +#include "rocksdb/cloud/cloud_storage_provider.h" +#include "rocksdb/convenience.h" +#include "rocksdb/env.h" +#include "test_util/testharness.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { + +TEST(CloudFileSystemTest, TestBucket) { + CloudFileSystemOptions copts; + copts.src_bucket.SetRegion("North"); + copts.src_bucket.SetBucketName("Input", "src."); + ASSERT_FALSE(copts.src_bucket.IsValid()); + copts.src_bucket.SetObjectPath("Here"); + ASSERT_TRUE(copts.src_bucket.IsValid()); + + copts.dest_bucket.SetRegion("South"); + copts.dest_bucket.SetObjectPath("There"); + ASSERT_FALSE(copts.dest_bucket.IsValid()); + copts.dest_bucket.SetBucketName("Output", "dest."); + ASSERT_TRUE(copts.dest_bucket.IsValid()); +} + +TEST(CloudFileSystemTest, ConfigureOptions) { + ConfigOptions config_options; + CloudFileSystemOptions copts, copy; + copts.keep_local_sst_files = false; + copts.keep_local_log_files = false; + copts.create_bucket_if_missing = false; + copts.validate_filesize = false; + copts.skip_dbid_verification = false; + copts.resync_on_open = false; + copts.skip_cloud_files_in_getchildren = false; + copts.constant_sst_file_size_in_sst_file_manager = 100; + copts.run_purger = false; + copts.purger_periodicity_millis = 101; + + std::string str; + ASSERT_OK(copts.Serialize(config_options, &str)); + ASSERT_OK(copy.Configure(config_options, str)); + ASSERT_FALSE(copy.keep_local_sst_files); + ASSERT_FALSE(copy.keep_local_log_files); + ASSERT_FALSE(copy.create_bucket_if_missing); + ASSERT_FALSE(copy.validate_filesize); + ASSERT_FALSE(copy.skip_dbid_verification); + ASSERT_FALSE(copy.resync_on_open); + ASSERT_FALSE(copy.skip_cloud_files_in_getchildren); + ASSERT_FALSE(copy.run_purger); + ASSERT_EQ(copy.constant_sst_file_size_in_sst_file_manager, 100); + ASSERT_EQ(copy.purger_periodicity_millis, 101); + + // Now try a different value + copts.keep_local_sst_files = true; + copts.keep_local_log_files = true; + copts.create_bucket_if_missing = true; + copts.validate_filesize = true; + copts.skip_dbid_verification = true; + copts.resync_on_open = true; + copts.skip_cloud_files_in_getchildren = true; + copts.constant_sst_file_size_in_sst_file_manager = 200; + copts.run_purger = true; + copts.purger_periodicity_millis = 201; + + ASSERT_OK(copts.Serialize(config_options, &str)); + ASSERT_OK(copy.Configure(config_options, str)); + ASSERT_TRUE(copy.keep_local_sst_files); + ASSERT_TRUE(copy.keep_local_log_files); + ASSERT_TRUE(copy.create_bucket_if_missing); + ASSERT_TRUE(copy.validate_filesize); + ASSERT_TRUE(copy.skip_dbid_verification); + ASSERT_TRUE(copy.resync_on_open); + ASSERT_TRUE(copy.skip_cloud_files_in_getchildren); + ASSERT_TRUE(copy.run_purger); + ASSERT_EQ(copy.constant_sst_file_size_in_sst_file_manager, 200); + ASSERT_EQ(copy.purger_periodicity_millis, 201); +} + +TEST(CloudFileSystemTest, ConfigureBucketOptions) { + ConfigOptions config_options; + CloudFileSystemOptions copts, copy; + std::string str; + copts.src_bucket.SetBucketName("source", "src."); + copts.src_bucket.SetObjectPath("foo"); + copts.src_bucket.SetRegion("north"); + copts.dest_bucket.SetBucketName("dest"); + copts.dest_bucket.SetObjectPath("bar"); + ASSERT_OK(copts.Serialize(config_options, &str)); + + ASSERT_OK(copy.Configure(config_options, str)); + ASSERT_EQ(copts.src_bucket.GetBucketName(), copy.src_bucket.GetBucketName()); + ASSERT_EQ(copts.src_bucket.GetObjectPath(), copy.src_bucket.GetObjectPath()); + ASSERT_EQ(copts.src_bucket.GetRegion(), copy.src_bucket.GetRegion()); + + ASSERT_EQ(copts.dest_bucket.GetBucketName(), + copy.dest_bucket.GetBucketName()); + ASSERT_EQ(copts.dest_bucket.GetObjectPath(), + copy.dest_bucket.GetObjectPath()); + ASSERT_EQ(copts.dest_bucket.GetRegion(), copy.dest_bucket.GetRegion()); +} + +TEST(CloudFileSystemTest, ConfigureEnv) { + std::unique_ptr cfs; + + ConfigOptions config_options; + config_options.invoke_prepare_options = false; + ASSERT_OK(CloudFileSystem::CreateFromString( + config_options, "keep_local_sst_files=true", &cfs)); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "cloud"); + auto copts = cfs->GetOptions(); + ASSERT_NE(copts, nullptr); + ASSERT_TRUE(copts->keep_local_sst_files); +} + +TEST(CloudFileSystemTest, TestInitialize) { + std::unique_ptr cfs; + BucketOptions bucket; + ConfigOptions config_options; + config_options.invoke_prepare_options = false; + ASSERT_OK(CloudFileSystem::CreateFromString( + config_options, "id=cloud; TEST=cloudenvtest:/test/path", &cfs)); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "cloud"); + + ASSERT_TRUE(StartsWith(cfs->GetSrcBucketName(), + bucket.GetBucketPrefix() + "cloudenvtest.")); + ASSERT_EQ(cfs->GetSrcObjectPath(), "/test/path"); + ASSERT_TRUE(cfs->SrcMatchesDest()); + + ASSERT_OK(CloudFileSystem::CreateFromString( + config_options, "id=cloud; TEST=cloudenvtest2:/test/path2?here", &cfs)); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "cloud"); + ASSERT_TRUE(StartsWith(cfs->GetSrcBucketName(), + bucket.GetBucketPrefix() + "cloudenvtest2.")); + ASSERT_EQ(cfs->GetSrcObjectPath(), "/test/path2"); + ASSERT_EQ(cfs->GetCloudFileSystemOptions().src_bucket.GetRegion(), "here"); + ASSERT_TRUE(cfs->SrcMatchesDest()); + + ASSERT_OK(CloudFileSystem::CreateFromString( + config_options, + "id=cloud; TEST=cloudenvtest3:/test/path3; " + "src.bucket=my_bucket; dest.object=/my_path", + &cfs)); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "cloud"); + ASSERT_EQ(cfs->GetSrcBucketName(), bucket.GetBucketPrefix() + "my_bucket"); + ASSERT_EQ(cfs->GetSrcObjectPath(), "/test/path3"); + ASSERT_TRUE(StartsWith(cfs->GetDestBucketName(), + bucket.GetBucketPrefix() + "cloudenvtest3.")); + ASSERT_EQ(cfs->GetDestObjectPath(), "/my_path"); +} + +TEST(CloudFileSystemTest, ConfigureGcpEnv) { + std::unique_ptr cfs; + + ConfigOptions config_options; + Status s = CloudFileSystem::CreateFromString( + config_options, "id=gcp; keep_local_sst_files=true", &cfs); +#ifdef USE_GCP + ASSERT_OK(s); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "gcp"); + auto copts = cfs->GetOptions(); + ASSERT_NE(copts, nullptr); + ASSERT_TRUE(copts->keep_local_sst_files); + ASSERT_NE(cfs->GetStorageProvider(), nullptr); + ASSERT_STREQ(cfs->GetStorageProvider()->Name(), + CloudStorageProviderImpl::kGcs()); +#else + ASSERT_NOK(s); + ASSERT_EQ(cfs, nullptr); +#endif +} + +TEST(CloudFileSystemTest, ConfigureGcsProvider) { + std::unique_ptr cfs; + + ConfigOptions config_options; + Status s = + CloudFileSystem::CreateFromString(config_options, "provider=gcs", &cfs); + ASSERT_NOK(s); + ASSERT_EQ(cfs, nullptr); + +#ifdef USE_GCP + ASSERT_OK(CloudFileSystem::CreateFromString(config_options, + "id=gcp; provider=gcs", &cfs)); + ASSERT_STREQ(cfs->Name(), "gcp"); + ASSERT_NE(cfs->GetStorageProvider(), nullptr); + ASSERT_STREQ(cfs->GetStorageProvider()->Name(), + CloudStorageProviderImpl::kGcs()); +#endif +} + +/* kinesis +// Test is disabled until we have a mock provider and authentication issues are +// resolved +TEST(CloudFileSystemTest, DISABLED_ConfigureKinesisController) { + std::unique_ptr cfs; + + ConfigOptions config_options; + Status s = CloudFileSystem::CreateFromString( + config_options, "provider=mock; controller=kinesis", &cfs); + ASSERT_NOK(s); + ASSERT_EQ(cfs, nullptr); + +#ifdef USE_AWS + ASSERT_OK(CloudFileSystem::CreateFromString( + config_options, "id=aws; controller=kinesis; TEST=dbcloud:/test", &cfs)); + ASSERT_STREQ(cfs->Name(), "aws"); + ASSERT_NE(cfs->GetLogController(), nullptr); + ASSERT_STREQ(cfs->GetLogController()->Name(), + CloudLogControllerImpl::kKinesis()); +#endif +} + +TEST(CloudFileSystemTest, ConfigureKafkaController) { + std::unique_ptr cfs; + + ConfigOptions config_options; + Status s = CloudFileSystem::CreateFromString( + config_options, "provider=mock; controller=kafka", &cfs); +#ifdef USE_KAFKA + ASSERT_OK(s); + ASSERT_NE(cfs, nullptr); + ASSERT_NE(cfs->GetLogController(), nullptr); + ASSERT_STREQ(cfs->GetLogController()->Name(), + CloudLogControllerImpl::kKafka()); +#else + ASSERT_NOK(s); + ASSERT_EQ(cfs, nullptr); +#endif +} +*/ +} // namespace ROCKSDB_NAMESPACE + + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/cloud/gcp/gcp_retry.cc b/cloud/gcp/gcp_retry.cc new file mode 100644 index 00000000000..53e0355dd42 --- /dev/null +++ b/cloud/gcp/gcp_retry.cc @@ -0,0 +1,123 @@ +#include "cloud/gcp/gcp_file_system.h" +#include "rocksdb/cloud/cloud_file_system.h" +#include + +#ifdef USE_GCP +#include +#include +#include +#include +#include +#include +#endif // USE_GCP + +namespace ROCKSDB_NAMESPACE { +#ifdef USE_GCP +namespace gcp = ::google::cloud; +namespace gcs = ::google::cloud::storage; + +// A retry policy that limits the total time spent and counts retrying. +class GcpRetryPolicy : public gcs::RetryPolicy { + public: + template + explicit GcpRetryPolicy( + CloudFileSystem* fs, + std::chrono::duration maximum_duration) + : cfs_(fs), + deadline_(std::chrono::system_clock::now() + maximum_duration), + time_based_policy_(maximum_duration) {} + + std::chrono::milliseconds maximum_duration() const { + return time_based_policy_.maximum_duration(); + } + + bool OnFailure(gcp::Status const& s) override { + bool is_retryable = time_based_policy_.OnFailure(s); + ++failure_count_; + if (is_retryable) { + // transient failure and resource available + if (failure_count_ <= maximum_failures_) { + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] Encountered failure: %s" + "retried %d / %d times. Retrying...", + s.message().c_str(), failure_count_, maximum_failures_); + return true; + } else { + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] Encountered failure: %s" + "retry attempt %d exceeds max retries %d. Aborting...", + s.message().c_str(), failure_count_, maximum_failures_); + // retry count exceed maxnum, but is not nonretryable + return false; + } + } else { + // non-transient failure or resource exhausted + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] Encountered permanent failure: %s" + "retry attempt %d / %d. Aborting...", + s.message().c_str(), failure_count_, maximum_failures_); + return false; + } + } + + bool IsExhausted() const override { + return (time_based_policy_.IsExhausted() || + failure_count_ > maximum_failures_); + } + bool IsPermanentFailure(gcp::Status const& s) const override { + return gcs::internal::StatusTraits::IsPermanentFailure(s); + } + + std::unique_ptr clone() const override { + return std::make_unique( + cfs_, time_based_policy_.maximum_duration()); + } + + private: + // rocksdb retries, etc + int failure_count_ = 0; + int maximum_failures_ = 10; + CloudFileSystem* cfs_; + std::chrono::system_clock::time_point deadline_; + // non-permermanent status in gcs::internal::StatusTraits + gcp::internal::LimitedTimeRetryPolicy + time_based_policy_; +}; + +#endif /* USE_GCP */ + +#ifdef USE_GCP +Status GcpCloudOptions::GetClientConfiguration(CloudFileSystem* fs, + std::string const& /*region*/, + gcp::Options& options) { + // Default gcs operation timeout is 10 minutes after all retrys. + uint64_t timeout_ms = 600000; + // All storage operations are idempotent, so we can use always retry. + options.set( + gcs::AlwaysRetryIdempotencyPolicy().clone()); + + // Use exponential backoff with a 1ms initial delay, 1 minute maximum delay, + options.set( + gcs::ExponentialBackoffPolicy(std::chrono::milliseconds(1), + std::chrono::minutes(1), 2.0) + .clone()); + + // Use request_timeout_ms from CloudFileSystemOptions if set. + auto const& cloud_fs_options = fs->GetCloudFileSystemOptions(); + if (cloud_fs_options.request_timeout_ms != 0) { + timeout_ms = cloud_fs_options.request_timeout_ms; + } + // Use timed and max retry count based retry policy. + options.set( + GcpRetryPolicy(fs, std::chrono::milliseconds(timeout_ms)).clone()); + return Status::OK(); +} +#else +Status GcpCloudOptions::GetClientConfiguration(CloudFileSystem*, + std::string const&, + gcp::Options&) { + return Status::NotSupported("Not configured for GCP support"); +} +#endif /* USE_GCP */ + +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/include/rocksdb/cloud/cloud_file_system.h b/include/rocksdb/cloud/cloud_file_system.h index 31f8dc84804..56aba4f2634 100644 --- a/include/rocksdb/cloud/cloud_file_system.h +++ b/include/rocksdb/cloud/cloud_file_system.h @@ -1,17 +1,17 @@ // Copyright (c) 2016-present, Rockset, Inc. All rights reserved. // #pragma once -#include -#include -#include -#include -#include - #include "rocksdb/cache.h" #include "rocksdb/configurable.h" #include "rocksdb/file_system.h" #include "rocksdb/io_status.h" #include "rocksdb/status.h" +#include +#include +#include +#include +#include +#include namespace Aws { namespace Auth { @@ -25,6 +25,28 @@ class S3Client; } } // namespace Aws +#ifdef USE_GCP +#include + +namespace google { +namespace cloud { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +class Options; +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace cloud +} // namespace google + +namespace google { +namespace cloud { +namespace storage { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +class Client; +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage +} // namespace cloud +} // namespace google +#endif + namespace ROCKSDB_NAMESPACE { class CloudFileSystem; @@ -68,10 +90,10 @@ class AwsCloudAccessCredentials { // functions to support AWS credentials // // Initialize AWS credentials using access_key_id and secret_key - void InitializeSimple(const std::string& aws_access_key_id, - const std::string& aws_secret_key); + void InitializeSimple(std::string const& aws_access_key_id, + std::string const& aws_secret_key); // Initialize AWS credentials using a config file - void InitializeConfig(const std::string& aws_config_file); + void InitializeConfig(std::string const& aws_config_file); // test if valid AWS credentials are present Status HasValid() const; @@ -82,7 +104,7 @@ class AwsCloudAccessCredentials { private: AwsAccessType GetAccessType() const; - Status CheckCredentials(const AwsAccessType& aws_type) const; + Status CheckCredentials(AwsAccessType const& aws_type) const; public: std::string access_key_id; @@ -95,8 +117,14 @@ class AwsCloudAccessCredentials { }; using S3ClientFactory = std::function( - const std::shared_ptr&, - const Aws::Client::ClientConfiguration&)>; + std::shared_ptr const&, + Aws::Client::ClientConfiguration const&)>; + +#ifdef USE_GCP +using GCSClientFactory = + std::function( + google::cloud::Options const&)>; +#endif // Defines parameters required to connect to Kafka class KafkaLogOptions { @@ -141,21 +169,26 @@ class BucketOptions { void SetBucketPrefix(std::string prefix); const std::string& GetBucketPrefix() const { return prefix_; } const std::string& GetBucketName(bool full = true) const { +======= + void SetBucketName(std::string const& bucket, std::string const& prefix = ""); + std::string const& GetBucketPrefix() const { return prefix_; } + std::string const& GetBucketName(bool full = true) const { +>>>>>>> c266119fc (Google cloud storage support) if (full) { return name_; } else { return bucket_; } } - const std::string& GetObjectPath() const { return object_; } - void SetObjectPath(const std::string& object) { object_ = object; } - const std::string& GetRegion() const { return region_; } - void SetRegion(const std::string& region) { region_ = region; } + std::string const& GetObjectPath() const { return object_; } + void SetObjectPath(std::string const& object) { object_ = object; } + std::string const& GetRegion() const { return region_; } + void SetRegion(std::string const& region) { region_ = region; } // Initializes the bucket properties for test purposes - void TEST_Initialize(const std::string& name_prefix, - const std::string& object_path, - const std::string& region = ""); + void TEST_Initialize(std::string const& name_prefix, + std::string const& object_path, + std::string const& region = ""); bool IsValid() const { if (object_.empty() || name_.empty()) { return false; @@ -165,7 +198,7 @@ class BucketOptions { } }; -inline bool operator==(const BucketOptions& lhs, const BucketOptions& rhs) { +inline bool operator==(BucketOptions const& lhs, BucketOptions const& rhs) { if (lhs.IsValid() && rhs.IsValid()) { return ((lhs.GetBucketName() == rhs.GetBucketName()) && (lhs.GetObjectPath() == rhs.GetObjectPath()) && @@ -174,14 +207,14 @@ inline bool operator==(const BucketOptions& lhs, const BucketOptions& rhs) { return false; } } -inline bool operator!=(const BucketOptions& lhs, const BucketOptions& rhs) { +inline bool operator!=(BucketOptions const& lhs, BucketOptions const& rhs) { return !(lhs == rhs); } class AwsCloudOptions { public: static Status GetClientConfiguration( - CloudFileSystem* fs, const std::string& region, + CloudFileSystem* fs, std::string const& region, Aws::Client::ClientConfiguration* config); }; @@ -192,7 +225,7 @@ class AwsCloudOptions { class CloudFileSystemOptions { private: public: - static const char* kName() { return "CloudFileSystemOptions"; } + static char const* kName() { return "CloudFileSystemOptions"; } BucketOptions src_bucket; BucketOptions dest_bucket; // Specify the type of cloud-service to use. Deprecated. @@ -212,9 +245,20 @@ class CloudFileSystemOptions { // Access credentials AwsCloudAccessCredentials credentials; + // Access credentials for GCP + // It is ADC based, which is not managable by user land + // Reserved for future use + // GcpCloudAccessCredentials gcp_credentials; + // If present, s3_client_factory will be used to create S3Client instances S3ClientFactory s3_client_factory; + // If present, gcs_client_factory will be used to create + // GCSCliet instances +#ifdef USE_GCP + GCSClientFactory gcs_client_factory; +#endif + // Only used if keep_local_log_files is true and log_type is kKafka. KafkaLogOptions kafka_log_options; @@ -445,11 +489,11 @@ class CloudFileSystemOptions { // Sets result based on the value of name or alt in the environment // Returns true if the name/alt exists in the environment, false otherwise - static bool GetNameFromEnvironment(const char* name, const char* alt, + static bool GetNameFromEnvironment(char const* name, char const* alt, std::string* result); - void TEST_Initialize(const std::string& name_prefix, - const std::string& object_path, - const std::string& region = ""); + void TEST_Initialize(std::string const& name_prefix, + std::string const& object_path, + std::string const& region = ""); Status Configure(const ConfigOptions& config_options, const std::string& opts_str); @@ -480,6 +524,7 @@ class CloudFileSystem : public FileSystem { public: static const char* kCloud() { return "cloud"; } static const char* kAws() { return "aws"; } + static char const* kGcp() { return "gcp"; } // Returns the underlying file system virtual const std::shared_ptr& GetBaseFileSystem() const = 0; @@ -487,31 +532,31 @@ class CloudFileSystem : public FileSystem { virtual IOStatus PreloadCloudManifest(const std::string& local_dbname) = 0; // This method will migrate the database that is using pure RocksDB into // RocksDB-Cloud. Call this before opening the database with RocksDB-Cloud. - virtual IOStatus MigrateFromPureRocksDB(const std::string& local_dbname) = 0; + virtual IOStatus MigrateFromPureRocksDB(std::string const& local_dbname) = 0; // Reads a file from the cloud virtual IOStatus NewSequentialFileCloud( - const std::string& bucket_prefix, const std::string& fname, - const FileOptions& file_opts, std::unique_ptr* result, + std::string const& bucket_prefix, std::string const& fname, + FileOptions const& file_opts, std::unique_ptr* result, IODebugContext* dbg) = 0; // Saves and retrieves the dbid->dirname mapping in cloud storage - virtual IOStatus SaveDbid(const std::string& bucket_name, - const std::string& dbid, - const std::string& dirname) = 0; - virtual IOStatus GetPathForDbid(const std::string& bucket_prefix, - const std::string& dbid, + virtual IOStatus SaveDbid(std::string const& bucket_name, + std::string const& dbid, + std::string const& dirname) = 0; + virtual IOStatus GetPathForDbid(std::string const& bucket_prefix, + std::string const& dbid, std::string* dirname) = 0; - virtual IOStatus GetDbidList(const std::string& bucket_prefix, + virtual IOStatus GetDbidList(std::string const& bucket_prefix, DbidList* dblist) = 0; - virtual IOStatus DeleteDbid(const std::string& bucket_prefix, - const std::string& dbid) = 0; + virtual IOStatus DeleteDbid(std::string const& bucket_prefix, + std::string const& dbid) = 0; // Deletes file from a destination bucket. - virtual IOStatus DeleteCloudFileFromDest(const std::string& fname) = 0; + virtual IOStatus DeleteCloudFileFromDest(std::string const& fname) = 0; // Copies a local file to a destination bucket. - virtual IOStatus CopyLocalFileToDest(const std::string& local_name, - const std::string& cloud_name) = 0; + virtual IOStatus CopyLocalFileToDest(std::string const& local_name, + std::string const& cloud_name) = 0; // Returns CloudManifest file name for a given db. virtual std::string CloudManifestFile(const std::string& dbname) = 0; @@ -539,14 +584,14 @@ class CloudFileSystem : public FileSystem { // For example, it will map 00010.sst to 00010.sst-[epoch] where [epoch] is // an epoch during which that file was created. // Files both in S3 and in the local directory have this [epoch] suffix. - virtual std::string RemapFilename(const std::string& logical_name) const = 0; + virtual std::string RemapFilename(std::string const& logical_name) const = 0; // Find the list of live files based on CloudManifest and Manifest in local db // // For the returned filepath in `live_sst_files` and `manifest_file`, we only // include the basename of the filepath but not the directory prefix to the // file - virtual IOStatus FindAllLiveFiles(const std::string& local_dbname, + virtual IOStatus FindAllLiveFiles(std::string const& local_dbname, std::vector* live_sst_files, std::string* manifest_file) = 0; @@ -565,7 +610,7 @@ class CloudFileSystem : public FileSystem { // // If delta has already been applied in cloud manifest, delta_applied would be // `false` - virtual IOStatus ApplyCloudManifestDelta(const CloudManifestDelta& delta, + virtual IOStatus ApplyCloudManifestDelta(CloudManifestDelta const& delta, bool* delta_applied) = 0; // This function does several things: @@ -577,12 +622,12 @@ class CloudFileSystem : public FileSystem { // // Return InvalidArgument status if the delta has been applied in current // CloudManifest - virtual IOStatus RollNewCookie(const std::string& local_dbname, - const std::string& cookie, - const CloudManifestDelta& delta) const = 0; + virtual IOStatus RollNewCookie(std::string const& local_dbname, + std::string const& cookie, + CloudManifestDelta const& delta) const = 0; virtual IOStatus GetMaxFileNumberFromCurrentManifest( - const std::string& local_dbname, uint64_t* max_file_number) = 0; + std::string const& local_dbname, uint64_t* max_file_number) = 0; // Delete both local and cloud invisble files virtual IOStatus DeleteCloudInvisibleFiles( @@ -657,19 +702,34 @@ class CloudFileSystemEnv { // data from cloud storage. // If dest_bucket_name is empty, then the associated db does not write any // data to cloud storage. - static Status NewAwsFileSystem(const std::shared_ptr& base_fs, - const std::string& src_bucket_name, - const std::string& src_object_prefix, - const std::string& src_bucket_region, - const std::string& dest_bucket_name, - const std::string& dest_object_prefix, - const std::string& dest_bucket_region, - const CloudFileSystemOptions& fs_options, - const std::shared_ptr& logger, + static Status NewAwsFileSystem(std::shared_ptr const& base_fs, + std::string const& src_bucket_name, + std::string const& src_object_prefix, + std::string const& src_bucket_region, + std::string const& dest_bucket_name, + std::string const& dest_object_prefix, + std::string const& dest_bucket_region, + CloudFileSystemOptions const& fs_options, + std::shared_ptr const& logger, CloudFileSystem** cfs); - static Status NewAwsFileSystem(const std::shared_ptr& base_fs, - const CloudFileSystemOptions& fs_options, - const std::shared_ptr& logger, + static Status NewAwsFileSystem(std::shared_ptr const& base_fs, + CloudFileSystemOptions const& fs_options, + std::shared_ptr const& logger, + CloudFileSystem** cfs); + + static Status NewGcpFileSystem(std::shared_ptr const& base_fs, + std::string const& src_bucket_name, + std::string const& src_object_prefix, + std::string const& src_buck_region, + std::string const& dest_bucket_name, + std::string const& dest_bucket_prefix, + std::string const& dest_bucket_region, + CloudFileSystemOptions const& fs_options, + std::shared_ptr const& logger, + CloudFileSystem** cfs); + static Status NewGcpFileSystem(std::shared_ptr const& base_fs, + CloudFileSystemOptions const& fs_options, + std::shared_ptr const& logger, CloudFileSystem** cfs); // Creates a new Env that delegates all thread/time related @@ -683,5 +743,4 @@ class CloudFileSystemEnv { const std::string& cookie, std::unique_ptr* cloud_manifest); }; - } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/cloud/cloud_file_system_impl.h b/include/rocksdb/cloud/cloud_file_system_impl.h index cb672b10e8a..6d7193501e7 100644 --- a/include/rocksdb/cloud/cloud_file_system_impl.h +++ b/include/rocksdb/cloud/cloud_file_system_impl.h @@ -29,6 +29,8 @@ class CloudFileSystemImpl : public CloudFileSystem { mutable std::shared_ptr info_log_; // informational messages static int RegisterAwsObjects(ObjectLibrary& library, const std::string& arg); + static int RegisterGcpObjects(ObjectLibrary& library, const std::string& arg); + // Constructor CloudFileSystemImpl(const CloudFileSystemOptions& options, const std::shared_ptr& base_fs, @@ -454,4 +456,4 @@ class CloudFileSystemImpl : public CloudFileSystem { std::shared_ptr cloud_file_deletion_scheduler_; }; -} // namespace ROCKSDB_NAMESPACE +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/include/rocksdb/cloud/cloud_storage_provider_impl.h b/include/rocksdb/cloud/cloud_storage_provider_impl.h index 22d8aa47dc9..7c0c0a40007 100644 --- a/include/rocksdb/cloud/cloud_storage_provider_impl.h +++ b/include/rocksdb/cloud/cloud_storage_provider_impl.h @@ -8,14 +8,14 @@ namespace ROCKSDB_NAMESPACE { class CloudStorageReadableFileImpl : public CloudStorageReadableFile { public: - CloudStorageReadableFileImpl(Logger* info_log, const std::string& bucket, - const std::string& fname, uint64_t size); + CloudStorageReadableFileImpl(Logger* info_log, std::string const& bucket, + std::string const& fname, uint64_t size); // sequential access, read data at current offset in file - IOStatus Read(size_t n, const IOOptions& options, Slice* result, + IOStatus Read(size_t n, IOOptions const& options, Slice* result, char* scratch, IODebugContext* dbg) override; // random access, read data from specified offset in file - IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, + IOStatus Read(uint64_t offset, size_t n, IOOptions const& options, Slice* result, char* scratch, IODebugContext* dbg) const override; @@ -23,7 +23,7 @@ class CloudStorageReadableFileImpl : public CloudStorageReadableFile { protected: virtual IOStatus DoCloudRead(uint64_t offset, size_t n, - const IOOptions& options, char* scratch, + IOOptions const& options, char* scratch, uint64_t* bytes_read, IODebugContext* dbg) const = 0; @@ -38,7 +38,7 @@ class CloudStorageReadableFileImpl : public CloudStorageReadableFile { class CloudStorageWritableFileImpl : public CloudStorageWritableFile { protected: CloudFileSystem* cfs_; - const char* class_; + char const* class_; std::string fname_; std::string tmp_file_; IOStatus status_; @@ -49,14 +49,14 @@ class CloudStorageWritableFileImpl : public CloudStorageWritableFile { public: CloudStorageWritableFileImpl(CloudFileSystem* fs, - const std::string& local_fname, - const std::string& bucket, - const std::string& cloud_fname, - const FileOptions& file_opts); + std::string const& local_fname, + std::string const& bucket, + std::string const& cloud_fname, + FileOptions const& file_opts); virtual ~CloudStorageWritableFileImpl(); using CloudStorageWritableFile::Append; - IOStatus Append(const Slice& data, const IOOptions& opts, + IOStatus Append(Slice const& data, IOOptions const& opts, IODebugContext* dbg) override { assert(status_.ok()); // write to temporary file @@ -64,16 +64,16 @@ class CloudStorageWritableFileImpl : public CloudStorageWritableFile { } using CloudStorageWritableFile::PositionedAppend; - IOStatus PositionedAppend(const Slice& data, uint64_t offset, - const IOOptions& opts, + IOStatus PositionedAppend(Slice const& data, uint64_t offset, + IOOptions const& opts, IODebugContext* dbg) override { return local_file_->PositionedAppend(data, offset, opts, dbg); } - IOStatus Truncate(uint64_t size, const IOOptions& opts, + IOStatus Truncate(uint64_t size, IOOptions const& opts, IODebugContext* dbg) override { return local_file_->Truncate(size, opts, dbg); } - IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override { + IOStatus Fsync(IOOptions const& opts, IODebugContext* dbg) override { return local_file_->Fsync(opts, dbg); } bool IsSyncThreadSafe() const override { @@ -83,7 +83,7 @@ class CloudStorageWritableFileImpl : public CloudStorageWritableFile { size_t GetRequiredBufferAlignment() const override { return local_file_->GetRequiredBufferAlignment(); } - uint64_t GetFileSize(const IOOptions& opts, IODebugContext* dbg) override { + uint64_t GetFileSize(IOOptions const& opts, IODebugContext* dbg) override { return local_file_->GetFileSize(opts, dbg); } size_t GetUniqueId(char* id, size_t max_size) const override { @@ -92,22 +92,22 @@ class CloudStorageWritableFileImpl : public CloudStorageWritableFile { IOStatus InvalidateCache(size_t offset, size_t length) override { return local_file_->InvalidateCache(offset, length); } - IOStatus RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& opts, + IOStatus RangeSync(uint64_t offset, uint64_t nbytes, IOOptions const& opts, IODebugContext* dbg) override { return local_file_->RangeSync(offset, nbytes, opts, dbg); } - IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& opts, + IOStatus Allocate(uint64_t offset, uint64_t len, IOOptions const& opts, IODebugContext* dbg) override { return local_file_->Allocate(offset, len, opts, dbg); } - IOStatus Flush(const IOOptions& opts, IODebugContext* dbg) override { + IOStatus Flush(IOOptions const& opts, IODebugContext* dbg) override { assert(status_.ok()); return local_file_->Flush(opts, dbg); } IOStatus status() override { return status_; } - IOStatus Sync(const IOOptions& opts, IODebugContext* dbg) override; - IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override; + IOStatus Sync(IOOptions const& opts, IODebugContext* dbg) override; + IOStatus Close(IOOptions const& opts, IODebugContext* dbg) override; }; // All writes to this DB can be configured to be persisted @@ -117,39 +117,42 @@ class Random64; class CloudStorageProviderImpl : public CloudStorageProvider { public: static Status CreateS3Provider(std::unique_ptr* result); - static const char* kS3() { return "s3"; } + static Status CreateGcsProvider( + std::unique_ptr* result); + static char const* kS3() { return "s3"; } + static char const* kGcs() { return "gcs"; } CloudStorageProviderImpl(); virtual ~CloudStorageProviderImpl(); - IOStatus GetCloudObject(const std::string& bucket_name, - const std::string& object_path, - const std::string& local_destination) override; - IOStatus PutCloudObject(const std::string& local_file, - const std::string& bucket_name, - const std::string& object_path) override; + IOStatus GetCloudObject(std::string const& bucket_name, + std::string const& object_path, + std::string const& local_destination) override; + IOStatus PutCloudObject(std::string const& local_file, + std::string const& bucket_name, + std::string const& object_path) override; IOStatus NewCloudReadableFile( - const std::string& bucket, const std::string& fname, - const FileOptions& options, + std::string const& bucket, std::string const& fname, + FileOptions const& options, std::unique_ptr* result, IODebugContext* dbg) override; - Status PrepareOptions(const ConfigOptions& options) override; + Status PrepareOptions(ConfigOptions const& options) override; protected: std::unique_ptr rng_; virtual IOStatus DoNewCloudReadableFile( - const std::string& bucket, const std::string& fname, uint64_t fsize, - const std::string& content_hash, const FileOptions& options, + std::string const& bucket, std::string const& fname, uint64_t fsize, + std::string const& content_hash, FileOptions const& options, std::unique_ptr* result, IODebugContext* dbg) = 0; // Downloads object from the cloud into a local directory - virtual IOStatus DoGetCloudObject(const std::string& bucket_name, - const std::string& object_path, - const std::string& local_path, + virtual IOStatus DoGetCloudObject(std::string const& bucket_name, + std::string const& object_path, + std::string const& local_path, uint64_t* remote_size) = 0; - virtual IOStatus DoPutCloudObject(const std::string& local_file, - const std::string& object_path, - const std::string& bucket_name, + virtual IOStatus DoPutCloudObject(std::string const& local_file, + std::string const& object_path, + std::string const& bucket_name, uint64_t file_size) = 0; CloudFileSystem* cfs_; diff --git a/src.mk b/src.mk index 238d57bf2d0..0fd8d0260d0 100644 --- a/src.mk +++ b/src.mk @@ -18,6 +18,9 @@ LIB_SOURCES = \ cloud/aws/aws_kinesis.cc \ cloud/aws/aws_retry.cc \ cloud/aws/aws_s3.cc \ + cloud/gcp/gcp_cs.cc \ + cloud/gcp/gcp_file_system.cc \ + cloud/gcp/gcp_retry.cc \ cloud/db_cloud_impl.cc \ cloud/cloud_file_system.cc \ cloud/cloud_file_system_impl.cc \ @@ -455,6 +458,8 @@ TEST_MAIN_SOURCES = \ cache/cache_reservation_manager_test.cc \ cloud/db_cloud_test.cc \ cloud/cloud_file_system_test.cc \ + cloud/gcp/gcp_file_system_test.cc \ + cloud/gcp/gcp_db_cloud_test.cc \ cloud/cloud_manifest_test.cc \ cloud/cloud_scheduler_test.cc \ cloud/replication_test.cc \ @@ -750,4 +755,4 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/writebatchhandlerjnicallback.cc \ java/rocksjni/write_batch_test.cc \ java/rocksjni/write_batch_with_index.cc \ - java/rocksjni/write_buffer_manager.cc + java/rocksjni/write_buffer_manager.cc \ No newline at end of file From 4ccc3169ff92df6e620099389383213b7475dfba Mon Sep 17 00:00:00 2001 From: githubzilla Date: Fri, 20 Oct 2023 10:38:10 +0000 Subject: [PATCH 02/16] Fix version conflicts caused when rebase --- cloud/gcp/gcp_db_cloud_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/cloud/gcp/gcp_db_cloud_test.cc b/cloud/gcp/gcp_db_cloud_test.cc index 270ae4b924d..cfc5af52901 100644 --- a/cloud/gcp/gcp_db_cloud_test.cc +++ b/cloud/gcp/gcp_db_cloud_test.cc @@ -51,7 +51,6 @@ class CloudTest : public testing::Test { dbname_ = test::TmpDir() + "/db_cloud-" + test_id_; clone_dir_ = test::TmpDir() + "/ctest-" + test_id_; cloud_fs_options_.TEST_Initialize("dbcloudtest.", dbname_); - cloud_fs_options_.resync_manifest_on_open = true; // To catch any possible file deletion bugs, cloud files are deleted // right away cloud_fs_options_.cloud_file_deletion_delay = std::chrono::seconds(0); From 0f9ab617cfc37d1c8ee89065342dfaae72d1cc6e Mon Sep 17 00:00:00 2001 From: githubzilla Date: Mon, 23 Oct 2023 06:35:10 +0000 Subject: [PATCH 03/16] Refine gcp_db_cloud_test for master --- cloud/gcp/gcp_db_cloud_test.cc | 435 ++++++++++----------------------- 1 file changed, 127 insertions(+), 308 deletions(-) diff --git a/cloud/gcp/gcp_db_cloud_test.cc b/cloud/gcp/gcp_db_cloud_test.cc index cfc5af52901..260b6df201c 100644 --- a/cloud/gcp/gcp_db_cloud_test.cc +++ b/cloud/gcp/gcp_db_cloud_test.cc @@ -1,8 +1,17 @@ // Copyright (c) 2017 Rockset #ifndef ROCKSDB_LITE + #ifdef USE_GCP +#include "rocksdb/cloud/db_cloud.h" + +#include +#include +#include +#include +#include + #include "cloud/cloud_file_deletion_scheduler.h" #include "cloud/cloud_file_system_impl.h" #include "cloud/cloud_scheduler.h" @@ -15,7 +24,6 @@ #include "file/filename.h" #include "logging/logging.h" #include "rocksdb/cloud/cloud_file_system.h" -#include "rocksdb/cloud/db_cloud.h" #include "rocksdb/options.h" #include "rocksdb/status.h" #include "rocksdb/table.h" @@ -24,10 +32,6 @@ #include "test_util/testutil.h" #include "util/random.h" #include "util/string_util.h" -#include -#include -#include -#include #ifndef OS_WIN #include #endif @@ -77,16 +81,16 @@ class CloudTest : public testing::Test { // check cloud credentials ASSERT_TRUE(cloud_fs_options_.credentials.HasValid().ok()); - CloudFileSystem* gfs; - // create a dummy gfs env + CloudFileSystem* afs; + // create a dummy Gcp env ASSERT_OK(CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), cloud_fs_options_, - options_.info_log, &gfs)); - ASSERT_NE(gfs, nullptr); + options_.info_log, &afs)); + ASSERT_NE(afs, nullptr); // delete all pre-existing contents from the bucket - auto st = gfs->GetStorageProvider()->EmptyBucket(gfs->GetSrcBucketName(), + auto st = afs->GetStorageProvider()->EmptyBucket(afs->GetSrcBucketName(), dbname_); - delete gfs; + delete afs; ASSERT_TRUE(st.ok() || st.IsNotFound()); DestroyDir(clone_dir_); @@ -129,7 +133,7 @@ class CloudTest : public testing::Test { return GetSSTFiles(cname); } - void DestroyDir(std::string const& dir) { + void DestroyDir(const std::string& dir) { std::string cmd = "rm -rf " + dir; int rc = system(cmd.c_str()); ASSERT_EQ(rc, 0); @@ -138,14 +142,14 @@ class CloudTest : public testing::Test { virtual ~CloudTest() { // Cleanup the cloud bucket if (!cloud_fs_options_.src_bucket.GetBucketName().empty()) { - CloudFileSystem* gfs; + CloudFileSystem* afs; Status st = CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), cloud_fs_options_, - options_.info_log, &gfs); + options_.info_log, &afs); if (st.ok()) { - gfs->GetStorageProvider()->EmptyBucket(gfs->GetSrcBucketName(), + afs->GetStorageProvider()->EmptyBucket(afs->GetSrcBucketName(), dbname_); - delete gfs; + delete afs; } } @@ -177,11 +181,11 @@ class CloudTest : public testing::Test { OpenWithColumnFamilies({kDefaultColumnFamilyName}, handles); } - void OpenWithColumnFamilies(std::vector const& cfs, + void OpenWithColumnFamilies(const std::vector& cfs, std::vector* handles) { ASSERT_TRUE(cloud_fs_options_.credentials.HasValid().ok()); - // Create new AWS env + // Create new Gcp env CreateCloudEnv(); options_.env = aenv_.get(); // Sleep for a second because S3 is eventual consistency. @@ -200,7 +204,7 @@ class CloudTest : public testing::Test { // Try to open and return status Status checkOpen() { - // Create new AWS env + // Create new Gcp env CreateCloudEnv(); options_.env = aenv_.get(); // Sleep for a second because S3 is eventual consistency. @@ -210,7 +214,7 @@ class CloudTest : public testing::Test { persistent_cache_size_gb_, &db_); } - void CreateColumnFamilies(std::vector const& cfs, + void CreateColumnFamilies(const std::vector& cfs, std::vector* handles) { ASSERT_NE(db_, nullptr); size_t cfi = handles->size(); @@ -221,9 +225,9 @@ class CloudTest : public testing::Test { } // Creates and Opens a clone - Status CloneDB(std::string const& clone_name, - std::string const& dest_bucket_name, - std::string const& dest_object_path, + Status CloneDB(const std::string& clone_name, + const std::string& dest_bucket_name, + const std::string& dest_object_path, std::unique_ptr* cloud_db, std::unique_ptr* env, bool force_keep_local_on_invalid_dest_bucket = true) { // The local directory where the clone resides @@ -245,7 +249,7 @@ class CloudTest : public testing::Test { force_keep_local_on_invalid_dest_bucket) { copt.keep_local_sst_files = true; } - // Create new AWS env + // Create new Gcp env Status st = CloudFileSystem::NewGcpFileSystem( base_env_->GetFileSystem(), copt, options_.info_log, &cfs); if (!st.ok()) { @@ -297,7 +301,7 @@ class CloudTest : public testing::Test { } } - void SetPersistentCache(std::string const& path, uint64_t size_gb) { + void SetPersistentCache(const std::string& path, uint64_t size_gb) { persistent_cache_path_ = path; persistent_cache_size_gb_ = size_gb; } @@ -352,14 +356,19 @@ class CloudTest : public testing::Test { return static_cast(aenv_->GetFileSystem().get()); } - DBImpl* GetDBImpl() const { return static_cast(db_->GetBaseDB()); } + DBImpl* GetDBImpl() const { + return static_cast(db_->GetBaseDB()); + } Status SwitchToNewCookie(std::string new_cookie) { - CloudManifestDelta delta{db_->GetNextFileNumber(), new_cookie}; + CloudManifestDelta delta{ + db_->GetNextFileNumber(), + new_cookie + }; return ApplyCMDeltaToCloudDB(delta); } - Status ApplyCMDeltaToCloudDB(CloudManifestDelta const& delta) { + Status ApplyCMDeltaToCloudDB(const CloudManifestDelta& delta) { auto st = GetCloudFileSystem()->RollNewCookie(dbname_, delta.epoch, delta); if (!st.ok()) { return st; @@ -405,19 +414,20 @@ class CloudTest : public testing::Test { std::vector sst_files; db->GetLiveFilesMetaData(&sst_files); ASSERT_EQ(sst_files.size(), 2); - for (auto& f : sst_files) { + for (auto& f: sst_files) { obsolete_files->push_back(cfs->RemapFilename(f.relative_filename)); } // trigger compaction, so previous 2 sst files will be obsolete - ASSERT_OK(db->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); + ASSERT_OK( + db->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); sst_files.clear(); db->GetLiveFilesMetaData(&sst_files); ASSERT_EQ(sst_files.size(), 1); } - // check that fname existsin in src bucket/object path - rocksdb::Status ExistsCloudObject(std::string const& filename) const { + // check that fname exists in in src bucket/object path + rocksdb::Status ExistsCloudObject(const std::string& filename) const { return GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( GetCloudFileSystem()->GetSrcBucketName(), GetCloudFileSystem()->GetSrcObjectPath() + pathsep + filename); @@ -474,11 +484,10 @@ TEST_F(CloudTest, FindAllLiveFilesTest) { std::vector tablefiles; std::string manifest; // fetch latest manifest to local - ASSERT_OK( - GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); EXPECT_EQ(tablefiles.size(), 1); - for (auto name : tablefiles) { + for (auto name: tablefiles) { EXPECT_EQ(GetFileType(name), RocksDBFileType::kSstFile); // verify that the sst file indeed exists in cloud EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( @@ -490,8 +499,7 @@ TEST_F(CloudTest, FindAllLiveFilesTest) { // verify that manifest file indeed exists in cloud auto storage_provider = GetCloudFileSystem()->GetStorageProvider(); auto bucket_name = GetCloudFileSystem()->GetSrcBucketName(); - auto object_path = - GetCloudFileSystem()->GetSrcObjectPath() + pathsep + manifest; + auto object_path = GetCloudFileSystem()->GetSrcObjectPath() + pathsep + manifest; EXPECT_OK(storage_provider->ExistsCloudObject(bucket_name, object_path)); } @@ -569,8 +577,7 @@ TEST_F(CloudTest, GetChildrenTest) { OpenDB(); std::vector children; - ASSERT_OK(aenv_->GetFileSystem()->GetChildren(dbname_, kIOOptions, &children, - kDbg)); + ASSERT_OK(aenv_->GetFileSystem()->GetChildren(dbname_, kIOOptions, &children, kDbg)); int sst_files = 0; for (auto c : children) { if (IsSstFile(c)) { @@ -583,6 +590,47 @@ TEST_F(CloudTest, GetChildrenTest) { EXPECT_EQ(sst_files, 1); } +TEST_F(CloudTest, FindLiveFilesFromLocalManifestTest) { + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "Universe")); + ASSERT_OK(db_->Flush(FlushOptions())); + + // wait until files are persisted into s3 + GetDBImpl()->TEST_WaitForBackgroundWork(); + + CloseDB(); + + // determine the manifest name and store a copy in a different location + auto cfs = GetCloudFileSystem(); + auto manifest_file = cfs->RemapFilename("MANIFEST"); + auto manifest_path = std::filesystem::path(dbname_) / manifest_file; + + auto alt_manifest_path = + std::filesystem::temp_directory_path() / ("ALT-" + manifest_file); + std::filesystem::copy_file(manifest_path, alt_manifest_path); + + DestroyDir(dbname_); + + std::vector tablefiles; + // verify the copied manifest can be processed correctly + ASSERT_OK(GetCloudFileSystem()->FindLiveFilesFromLocalManifest( + alt_manifest_path, &tablefiles)); + + // verify the result + EXPECT_EQ(tablefiles.size(), 1); + + for (auto name : tablefiles) { + EXPECT_EQ(GetFileType(name), RocksDBFileType::kSstFile); + // verify that the sst file indeed exists in cloud + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + pathsep + name)); + } + + // clean up + std::filesystem::remove(alt_manifest_path); +} + // // Create and read from a clone. // @@ -852,7 +900,8 @@ TEST_F(CloudTest, DbidRegistry) { TEST_F(CloudTest, KeepLocalFiles) { cloud_fs_options_.keep_local_sst_files = true; for (int iter = 0; iter < 4; ++iter) { - cloud_fs_options_.use_direct_io_for_cloud_download = true; + cloud_fs_options_.use_direct_io_for_cloud_download = + iter == 0 || iter == 1; // Create two files OpenDB(); std::string value; @@ -868,7 +917,7 @@ TEST_F(CloudTest, KeepLocalFiles) { std::vector files; ASSERT_OK(Env::Default()->GetChildren(dbname_, &files)); long sst_files = - std::count_if(files.begin(), files.end(), [](std::string const& file) { + std::count_if(files.begin(), files.end(), [](const std::string& file) { return file.find("sst") != std::string::npos; }); ASSERT_EQ(sst_files, 2); @@ -892,7 +941,7 @@ TEST_F(CloudTest, CopyToFromGcs) { // iter 0 -- not using transfer manager // iter 1 -- using transfer manager for (int iter = 0; iter < 2; ++iter) { - // Create aws env + // Create Gcp env cloud_fs_options_.keep_local_sst_files = true; CreateCloudEnv(); auto* cimpl = GetCloudFileSystemImpl(); @@ -937,7 +986,7 @@ TEST_F(CloudTest, CopyToFromGcs) { TEST_F(CloudTest, DelayFileDeletion) { std::string fname = dbname_ + "/000010.sst"; - // Create aws env + // Create Gcp env cloud_fs_options_.keep_local_sst_files = true; cloud_fs_options_.cloud_file_deletion_delay = std::chrono::seconds(2); CreateCloudEnv(); @@ -1054,31 +1103,6 @@ TEST_F(CloudTest, Savepoint) { GetCloudFileSystem()->GetSrcBucketName(), dest_path); } -// no encryption now -// TEST_F(CloudTest, Encryption) { -// // Create aws env -// cloud_fs_options_.server_side_encryption = true; -// char* key_id = getenv("GCP_KMS_KEY_ID"); -// if (key_id != nullptr) { -// cloud_fs_options_.encryption_key_id = std::string(key_id); -// Log(options_.info_log, "Found encryption key id in env variable %s", -// key_id); -// } - -// OpenDB(); - -// ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); -// // create a file -// ASSERT_OK(db_->Flush(FlushOptions())); -// CloseDB(); - -// OpenDB(); -// std::string value; -// ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); -// ASSERT_EQ(value, "World"); -// CloseDB(); -// } - TEST_F(CloudTest, DirectReads) { options_.use_direct_reads = true; options_.use_direct_io_for_flush_and_compaction = true; @@ -1103,83 +1127,6 @@ TEST_F(CloudTest, DirectReads) { CloseDB(); } -#ifdef USE_KAFKA -TEST_F(CloudTest, KeepLocalLogKafka) { - cloud_fs_options_.keep_local_log_files = false; - cloud_fs_options_.log_type = LogType::kLogKafka; - cloud_fs_options_.kafka_log_options - .client_config_params["metadata.broker.list"] = "localhost:9092"; - - OpenDB(); - - ASSERT_OK(db_->Put(WriteOptions(), "Franz", "Kafka")); - - // Destroy DB in memory and on local file system. - delete db_; - db_ = nullptr; - aenv_.reset(); - DestroyDir(dbname_); - DestroyDir("/tmp/ROCKSET"); - - // Create new env. - CreateCloudEnv(); - - // Give env enough time to consume WALs - std::this_thread::sleep_for(std::chrono::seconds(3)); - - // Open DB. - cloud_fs_options_.keep_local_log_files = true; - auto* cimpl = GetCloudFileSystemImpl(); - options_.wal_dir = cimpl->GetWALCacheDir(); - OpenDB(); - - // Test read. - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), "Franz", &value)); - ASSERT_EQ(value, "Kafka"); - - CloseDB(); -} -#endif /* USE_KAFKA */ - -// TODO(igor): determine why this fails, -// https://github.com/rockset/rocksdb-cloud/issues/35 -// TEST_F(CloudTest, DISABLED_KeepLocalLogKinesis) { -// cloud_fs_options_.keep_local_log_files = false; -// cloud_fs_options_.log_type = LogType::kLogKinesis; - -// OpenDB(); - -// // Test write. -// ASSERT_OK(db_->Put(WriteOptions(), "Tele", "Kinesis")); - -// // Destroy DB in memory and on local file system. -// delete db_; -// db_ = nullptr; -// aenv_.reset(); -// DestroyDir(dbname_); -// DestroyDir("/tmp/ROCKSET"); - -// // Create new env. -// CreateCloudEnv(); - -// // Give env enough time to consume WALs -// std::this_thread::sleep_for(std::chrono::seconds(3)); - -// // Open DB. -// cloud_fs_options_.keep_local_log_files = true; -// auto* cimpl = GetCloudFileSystemImpl(); -// options_.wal_dir = cimpl->GetWALCacheDir(); -// OpenDB(); - -// // Test read. -// std::string value; -// ASSERT_OK(db_->Get(ReadOptions(), "Tele", &value)); -// ASSERT_EQ(value, "Kinesis"); - -// CloseDB(); -// } - // Test whether we are able to recover nicely from two different writers to the // same S3 bucket. (The feature that was enabled by CLOUDMANIFEST) TEST_F(CloudTest, TwoDBsOneBucket) { @@ -1560,7 +1507,7 @@ TEST_F(CloudTest, EphemeralOnCorruptedDB) { // Get the MANIFEST file std::string manifest_file_name; - for (auto const& file_name : files) { + for (const auto& file_name : files) { if (file_name.rfind("MANIFEST", 0) == 0) { manifest_file_name = file_name; break; @@ -1739,7 +1686,6 @@ TEST_F(CloudTest, CheckpointToCloud) { cloud_fs_options_.dest_bucket.GetObjectPath()); cloud_fs_options_.src_bucket = checkpoint_bucket; - cloud_fs_options_.dest_bucket = checkpoint_bucket; OpenDB(); std::string value; @@ -1872,125 +1818,6 @@ TEST_F(CloudTest, SharedBlockCache) { cloud_fs_options_.src_bucket.GetObjectPath() + "-clone"); } -// Verify that sst_file_cache and file_cache cannot be set together -TEST_F(CloudTest, KeepLocalFilesAndFileCache) { - cloud_fs_options_.sst_file_cache = NewLRUCache(1024); // 1 KB cache - cloud_fs_options_.keep_local_sst_files = true; - ASSERT_TRUE(checkOpen().IsInvalidArgument()); -} - -// Verify that sst_file_cache can be disabled -TEST_F(CloudTest, FileCacheZero) { - cloud_fs_options_.sst_file_cache = NewLRUCache(0); // zero size - OpenDB(); - auto* cimpl = GetCloudFileSystemImpl(); - ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); - ASSERT_OK(db_->Flush(FlushOptions())); - auto local_files = GetSSTFiles(dbname_); - EXPECT_EQ(local_files.size(), 0); - EXPECT_EQ(cimpl->FileCacheGetCharge(), 0); - - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), "a", &value)); - ASSERT_TRUE(value.compare("b") == 0); - ASSERT_OK(db_->Get(ReadOptions(), "c", &value)); - ASSERT_TRUE(value.compare("d") == 0); - CloseDB(); -} - -// Verify that sst_file_cache is very small, so no files are local. -TEST_F(CloudTest, FileCacheSmall) { - cloud_fs_options_.sst_file_cache = NewLRUCache(10); // Practically zero size - OpenDB(); - auto* cimpl = GetCloudFileSystemImpl(); - ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); - ASSERT_OK(db_->Flush(FlushOptions())); - auto local_files = GetSSTFiles(dbname_); - EXPECT_EQ(local_files.size(), 0); - EXPECT_EQ(cimpl->FileCacheGetCharge(), 0); - CloseDB(); -} - -// Relatively large sst_file cache, so all files are local. -TEST_F(CloudTest, FileCacheLarge) { - size_t capacity = 10240L; - std::shared_ptr cache = NewLRUCache(capacity); - cloud_fs_options_.sst_file_cache = cache; - - // generate two sst files. - OpenDB(); - auto* cimpl = GetCloudFileSystemImpl(); - ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); - ASSERT_OK(db_->Flush(FlushOptions())); - - // check that local sst files exist - auto local_files = GetSSTFiles(dbname_); - EXPECT_EQ(local_files.size(), 2); - - // check that local sst files have non zero size - uint64_t totalFileSize = 0; - GetSSTFilesTotalSize(dbname_, &totalFileSize); - EXPECT_GT(totalFileSize, 0); - EXPECT_GE(capacity, totalFileSize); - - // check that cache has two entries - EXPECT_EQ(cimpl->FileCacheGetNumItems(), 2); - - // check that cache charge matches total local sst file size - EXPECT_EQ(cimpl->FileCacheGetNumItems(), 2); - EXPECT_EQ(cimpl->FileCacheGetCharge(), totalFileSize); - CloseDB(); -} - -// Cache will have a few files only. -TEST_F(CloudTest, FileCacheOnDemand) { - size_t capacity = 3000; - int num_shard_bits = 0; // 1 shard - bool strict_capacity_limit = false; - double high_pri_pool_ratio = 0; - - std::shared_ptr cache = - NewLRUCache(capacity, num_shard_bits, strict_capacity_limit, - high_pri_pool_ratio, nullptr, kDefaultToAdaptiveMutex, - CacheMetadataChargePolicy::kDontChargeCacheMetadata); - cloud_fs_options_.sst_file_cache = cache; - options_.level0_file_num_compaction_trigger = 100; // never compact - - OpenDB(); - auto* cimpl = GetCloudFileSystemImpl(); - - // generate four sst files, each of size about 884 bytes - ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->Put(WriteOptions(), "e", "f")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->Put(WriteOptions(), "g", "h")); - ASSERT_OK(db_->Flush(FlushOptions())); - - // The db should have 4 sst files in the manifest. - std::vector flist; - db_->GetLiveFilesMetaData(&flist); - EXPECT_EQ(flist.size(), 4); - - // verify that there are only two entries in the cache - EXPECT_EQ(cimpl->FileCacheGetNumItems(), 2); - EXPECT_EQ(cimpl->FileCacheGetCharge(), cache->GetUsage()); - - // There should be only two local sst files. - auto local_files = GetSSTFiles(dbname_); - EXPECT_EQ(local_files.size(), 2); - - CloseDB(); -} - TEST_F(CloudTest, FindLiveFilesFetchManifestTest) { OpenDB(); ASSERT_OK(db_->Put({}, "a", "1")); @@ -2108,6 +1935,7 @@ TEST_F(CloudTest, LiveFilesConsistentAfterApplyCloudManifestDeltaTest) { CloseDB(); } + // After calling `ApplyCloudManifestDelta`, writes should be persisted in // sst files only visible in new Manifest TEST_F(CloudTest, WriteAfterUpdateCloudManifestArePersistedInNewEpoch) { @@ -2353,8 +2181,7 @@ TEST_F(CloudTest, CookieRollbackTest) { TEST_F(CloudTest, NewCookieOnOpenTest) { cloud_fs_options_.cookie_on_open = "1"; - // when opening new db, only new_cookie_on_open is used as CLOUDMANIFEST - // suffix + // when opening new db, only new_cookie_on_open is used as CLOUDMANIFEST suffix cloud_fs_options_.new_cookie_on_open = "2"; OpenDB(); ASSERT_OK(db_->Put({}, "k1", "v1")); @@ -2544,7 +2371,8 @@ TEST_F(CloudTest, DisableInvisibleFileDeletionOnOpenTest) { cookie2_sst_files.resize(1); auto cookie2_manifest_filepath = dbname_ + pathsep + cookie2_manifest_file; - auto cookie2_cm_filepath = MakeCloudManifestFile(dbname_, cookie2); + auto cookie2_cm_filepath = + MakeCloudManifestFile(dbname_, cookie2); auto cookie2_sst_filepath = dbname_ + pathsep + cookie2_sst_files[0]; CloseDB(); @@ -2553,8 +2381,7 @@ TEST_F(CloudTest, DisableInvisibleFileDeletionOnOpenTest) { cloud_fs_options_.delete_cloud_invisible_files_on_open = false; OpenDB(); // files from cookie2 are deleted locally but exists in s3 - for (auto path : - {cookie2_cm_filepath, cookie2_manifest_filepath, cookie2_sst_filepath}) { + for (auto path: {cookie2_cm_filepath, cookie2_manifest_filepath, cookie2_sst_filepath}) { EXPECT_NOK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( path, kIOOptions, kDbg)); EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( @@ -2582,8 +2409,8 @@ TEST_F(CloudTest, DisableObsoleteFileDeletionOnOpenTest) { options_.arena_block_size = 4 << 10; options_.keep_log_file_num = 1; options_.use_options_file = false; - // put wal files into one directory so that we don't need to count number of - // local wal files + // put wal files into one directory so that we don't need to count number of local + // wal files options_.wal_dir = dbname_ + "/wal"; cloud_fs_options_.keep_local_sst_files = true; // disable cm roll so that no new manifest files generated @@ -2615,8 +2442,7 @@ TEST_F(CloudTest, DisableObsoleteFileDeletionOnOpenTest) { ASSERT_EQ(files.size(), 1); local_files = GetAllLocalFiles(); - // obsolete files are not deleted, also one extra sst files generated after - // compaction + // obsolete files are not deleted, also one extra sst files generated after compaction EXPECT_EQ(local_files.size(), 9); CloseDB(); @@ -2694,7 +2520,7 @@ TEST_F(CloudTest, TwoConcurrentWritersCookieNotEmpty) { db_ = nullptr; aenv1 = aenv_.release(); }; - auto openDB1NoCookieSwitch = [&](std::string const& cookie) { + auto openDB1NoCookieSwitch = [&](const std::string& cookie) { dbname_ = firstDB; // when reopening DB1, we should set cookie_on_open = 2 to make sure // we are opening with the right CM/M files @@ -2714,7 +2540,7 @@ TEST_F(CloudTest, TwoConcurrentWritersCookieNotEmpty) { db_ = nullptr; aenv2 = aenv_.release(); }; - auto openDB2NoCookieSwitch = [&](std::string const& cookie) { + auto openDB2NoCookieSwitch = [&](const std::string& cookie) { dbname_ = secondDB; // when reopening DB1, we should set cookie_on_open = 3 to make sure // we are opening with the right CM/M files @@ -2800,8 +2626,7 @@ TEST_F(CloudTest, FileDeletionFailureIgnoredTest) { ASSERT_OK(db_->Flush({})); CloseDB(); - // bump the manifest epoch so that next time opening it, manifest file will be - // deleted + // bump the manifest epoch so that next time opening it, manifest file will be deleted OpenDB(); CloseDB(); @@ -2827,8 +2652,7 @@ TEST_F(CloudTest, FileDeletionFailureIgnoredTest) { SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); - // reopen the db should delete the obsolete manifest file after we cleanup - // syncpoint + // reopen the db should delete the obsolete manifest file after we cleanup syncpoint OpenDB(); EXPECT_NOK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( manifest_file_path, kIOOptions, kDbg)); @@ -2890,7 +2714,6 @@ TEST_F(CloudTest, FileDeletionJobsCanceledWhenCloudEnvDestructed) { // The failure case of opening a corrupted db which doesn't have MANIFEST file TEST_F(CloudTest, OpenWithManifestMissing) { cloud_fs_options_.resync_on_open = true; - cloud_fs_options_.resync_manifest_on_open = true; OpenDB(); auto epoch = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); CloseDB(); @@ -2971,8 +2794,7 @@ TEST_F(CloudTest, ReopenEphemeralAfterFileDeletion) { std::vector files; durable->GetLiveFilesMetaData(&files); ASSERT_EQ(files.size(), 2); - // trigger compaction on durable with trivial file moves disabled, which will - // delete previously generated sst files + // trigger compaction on durable with trivial file moves disabled, which will delete previously generated sst files ASSERT_OK( static_cast(durable->GetBaseDB()) ->TEST_CompactRange(0, nullptr, nullptr, durableHandles[0], true)); @@ -3085,7 +2907,7 @@ TEST_F(CloudTest, CloudFileDeletionNotTriggeredIfDestBucketNotSet) { cloud_fs_options_.delete_cloud_invisible_files_on_open = true; OpenDB(); WaitUntilNoScheduledJobs(); - for (auto& fname : files_to_delete) { + for (auto& fname: files_to_delete) { EXPECT_OK(ExistsCloudObject(fname)); } CloseDB(); @@ -3093,7 +2915,7 @@ TEST_F(CloudTest, CloudFileDeletionNotTriggeredIfDestBucketNotSet) { cloud_fs_options_.dest_bucket = cloud_fs_options_.src_bucket; OpenDB(); WaitUntilNoScheduledJobs(); - for (auto& fname : files_to_delete) { + for (auto& fname: files_to_delete) { EXPECT_NOK(ExistsCloudObject(fname)); } CloseDB(); @@ -3139,15 +2961,15 @@ TEST_F(CloudTest, UnscheduleFileDeletionTest) { for (int i = 0; i < num_file_deletions; i++) { std::string filename = std::to_string(i) + ".sst"; files_to_delete.push_back(filename); - ASSERT_OK(deletion_scheduler->ScheduleFileDeletion( - filename, [&counter]() { counter++; })); + ASSERT_OK( + deletion_scheduler->ScheduleFileDeletion(filename, [&counter]() { counter++; })); } auto actual_files_to_delete = deletion_scheduler->TEST_FilesToDelete(); std::sort(actual_files_to_delete.begin(), actual_files_to_delete.end()); EXPECT_EQ(actual_files_to_delete, files_to_delete); int num_scheduled_jobs = num_file_deletions; - for (auto& fname : files_to_delete) { + for (auto& fname: files_to_delete) { deletion_scheduler->UnscheduleFileDeletion(fname); num_scheduled_jobs -= 1; EXPECT_EQ(scheduler->TEST_NumScheduledJobs(), num_scheduled_jobs); @@ -3184,31 +3006,30 @@ TEST_F( // - scheduled file deletion job starts running (but file not deleted yet) // - destruct CloudFileDeletionScheduler // - file deletion job deletes the file - SyncPoint::GetInstance()->LoadDependency( - {{ - // `BeforeCancelJobs` happens-after `BeforeFileDeletion` - "CloudFileDeletionScheduler::ScheduleFileDeletion:" - "BeforeFileDeletion", - "CloudFileDeletionScheduler::~CloudFileDeletionScheduler:" - "BeforeCancelJobs", - }, - {"CloudFileDeletionScheduler::~CloudFileDeletionScheduler:" - "BeforeCancelJobs", - "CloudFileDeletionScheduler::ScheduleFileDeletion:AfterFileDeletion"}}); + SyncPoint::GetInstance()->LoadDependency({ + { + // `BeforeCancelJobs` happens-after `BeforeFileDeletion` + "CloudFileDeletionScheduler::ScheduleFileDeletion:BeforeFileDeletion", + "CloudFileDeletionScheduler::~CloudFileDeletionScheduler:BeforeCancelJobs", + }, + { + "CloudFileDeletionScheduler::~CloudFileDeletionScheduler:BeforeCancelJobs", + "CloudFileDeletionScheduler::ScheduleFileDeletion:AfterFileDeletion" + } + }); std::atomic num_jobs_finished{0}; SyncPoint::GetInstance()->SetCallBack( "CloudFileDeletionScheduler::ScheduleFileDeletion:AfterFileDeletion", [&](void* arg) { ASSERT_NE(nullptr, arg); - auto file_deleted = *reinterpret_cast(arg); + auto file_deleted = *reinterpret_cast(arg); EXPECT_FALSE(file_deleted); num_jobs_finished++; }); SyncPoint::GetInstance()->EnableProcessing(); // file not deleted immediately but just scheduled - ASSERT_OK( - aenv_->GetFileSystem()->DeleteFile(obsolete_files[0], kIOOptions, kDbg)); + ASSERT_OK(aenv_->GetFileSystem()->DeleteFile(obsolete_files[0], kIOOptions, kDbg)); EXPECT_EQ(GetCloudFileSystemImpl()->TEST_NumScheduledJobs(), 1); // destruct `CloudFileSystem`, which will cause `CloudFileDeletionScheduler` // to be destructed @@ -3241,7 +3062,7 @@ TEST_F(CloudTest, ReplayCloudManifestDeltaTest) { ASSERT_OK(db_->Put({}, "k" + std::to_string(i), "v" + std::to_string(i))); ASSERT_OK(db_->Flush({})); - auto cookie1 = std::to_string(i) + "0"; + auto cookie1 = std::to_string(i) + "0"; auto filenum1 = db_->GetNextFileNumber(); deltas.push_back({filenum1, cookie1}); ASSERT_OK(SwitchToNewCookie(cookie1)); @@ -3258,7 +3079,7 @@ TEST_F(CloudTest, ReplayCloudManifestDeltaTest) { GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); // replay the deltas one more time - for (auto const& delta : deltas) { + for (const auto& delta : deltas) { EXPECT_TRUE(GetCloudFileSystem() ->RollNewCookie(dbname_, delta.epoch, delta) .IsInvalidArgument()); @@ -3305,9 +3126,7 @@ TEST_F(CloudTest, CreateIfMissing) { // A black-box test for the cloud wrapper around rocksdb int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - // Aws::InitAPI(Aws::SDKOptions()); auto r = RUN_ALL_TESTS(); - // Aws::ShutdownAPI(Aws::SDKOptions()); return r; } @@ -3317,7 +3136,7 @@ int main(int argc, char** argv) { int main(int, char**) { fprintf(stderr, - "SKIPPED as DBCloud is supported only when USE_GCP is defined.\n"); + "SKIPPED as DBCloud is supported only when USE_Gcp is defined.\n"); return 0; } #endif From 80187bf9524b2454e16bc155783895d142a09355 Mon Sep 17 00:00:00 2001 From: githubzilla Date: Mon, 23 Oct 2023 07:59:15 +0000 Subject: [PATCH 04/16] Remove log related test case from gcp file system test --- cloud/gcp/gcp_file_system_test.cc | 41 ------------------------------- 1 file changed, 41 deletions(-) diff --git a/cloud/gcp/gcp_file_system_test.cc b/cloud/gcp/gcp_file_system_test.cc index a7963bb2c1e..8dfbe5b6d37 100644 --- a/cloud/gcp/gcp_file_system_test.cc +++ b/cloud/gcp/gcp_file_system_test.cc @@ -198,47 +198,6 @@ TEST(CloudFileSystemTest, ConfigureGcsProvider) { CloudStorageProviderImpl::kGcs()); #endif } - -/* kinesis -// Test is disabled until we have a mock provider and authentication issues are -// resolved -TEST(CloudFileSystemTest, DISABLED_ConfigureKinesisController) { - std::unique_ptr cfs; - - ConfigOptions config_options; - Status s = CloudFileSystem::CreateFromString( - config_options, "provider=mock; controller=kinesis", &cfs); - ASSERT_NOK(s); - ASSERT_EQ(cfs, nullptr); - -#ifdef USE_AWS - ASSERT_OK(CloudFileSystem::CreateFromString( - config_options, "id=aws; controller=kinesis; TEST=dbcloud:/test", &cfs)); - ASSERT_STREQ(cfs->Name(), "aws"); - ASSERT_NE(cfs->GetLogController(), nullptr); - ASSERT_STREQ(cfs->GetLogController()->Name(), - CloudLogControllerImpl::kKinesis()); -#endif -} - -TEST(CloudFileSystemTest, ConfigureKafkaController) { - std::unique_ptr cfs; - - ConfigOptions config_options; - Status s = CloudFileSystem::CreateFromString( - config_options, "provider=mock; controller=kafka", &cfs); -#ifdef USE_KAFKA - ASSERT_OK(s); - ASSERT_NE(cfs, nullptr); - ASSERT_NE(cfs->GetLogController(), nullptr); - ASSERT_STREQ(cfs->GetLogController()->Name(), - CloudLogControllerImpl::kKafka()); -#else - ASSERT_NOK(s); - ASSERT_EQ(cfs, nullptr); -#endif -} -*/ } // namespace ROCKSDB_NAMESPACE From 87252da836300d96d0c027cccafda1f51c4a4cbc Mon Sep 17 00:00:00 2001 From: githubzilla Date: Mon, 23 Oct 2023 11:16:05 +0000 Subject: [PATCH 05/16] make with USE_AWS=1 and without USE_GCP=1 work properly --- cloud/cloud_file_system.cc | 2 ++ cloud/gcp/gcp_file_system.cc | 7 +++---- cloud/gcp/gcp_file_system_test.cc | 25 ++++++++++++++++++++++++- cloud/gcp/gcp_retry.cc | 20 +++++++++++--------- src.mk | 4 ++-- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/cloud/cloud_file_system.cc b/cloud/cloud_file_system.cc index 584481a333f..8bee6e83903 100644 --- a/cloud/cloud_file_system.cc +++ b/cloud/cloud_file_system.cc @@ -486,7 +486,9 @@ int DoRegisterCloudObjects(ObjectLibrary& library, std::string const& arg) { count++; count += CloudFileSystemImpl::RegisterAwsObjects(library, arg); +#ifdef USE_GCP // Only register GCP if we have it count += CloudFileSystemImpl::RegisterGcpObjects(library, arg); +#endif // Register the Cloud Log Controllers diff --git a/cloud/gcp/gcp_file_system.cc b/cloud/gcp/gcp_file_system.cc index 92b43fb35e4..973a30b4ec2 100644 --- a/cloud/gcp/gcp_file_system.cc +++ b/cloud/gcp/gcp_file_system.cc @@ -1,5 +1,7 @@ #ifndef ROCKSDB_LITE +#ifdef USE_GCP + #include #include "rocksdb/convenience.h" @@ -8,8 +10,6 @@ #include "cloud/gcp/gcp_file_system.h" #include "cloud/cloud_storage_provider_impl.h" -#ifdef USE_GCP - namespace ROCKSDB_NAMESPACE { GcpFileSystem::GcpFileSystem(std::shared_ptr const& underlying_fs, CloudFileSystemOptions const& cloud_options, @@ -75,7 +75,6 @@ Status GcpFileSystem::PrepareOptions(ConfigOptions const& options) { int CloudFileSystemImpl::RegisterGcpObjects(ObjectLibrary& library, std::string const& /*arg*/) { int count = 0; -#ifdef USE_GCP library.AddFactory( CloudFileSystemImpl::kGcp(), [](std::string const& /*uri*/, std::unique_ptr* guard, @@ -92,7 +91,7 @@ int CloudFileSystemImpl::RegisterGcpObjects(ObjectLibrary& library, } }); count++; -#endif + library.AddFactory( CloudStorageProviderImpl::kGcs(), [](std::string const& /*uri*/, diff --git a/cloud/gcp/gcp_file_system_test.cc b/cloud/gcp/gcp_file_system_test.cc index 8dfbe5b6d37..9a5b0311c0e 100644 --- a/cloud/gcp/gcp_file_system_test.cc +++ b/cloud/gcp/gcp_file_system_test.cc @@ -1,5 +1,9 @@ // Copyright (c) 2017 Rockset +#ifndef ROCKSDB_LITE + +#ifdef USE_GCP + #include "rocksdb/cloud/cloud_file_system.h" #include "cloud/cloud_log_controller_impl.h" @@ -204,4 +208,23 @@ TEST(CloudFileSystemTest, ConfigureGcsProvider) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} + +#else // USE_GCP +#include + +int main(int, char**) { + fprintf(stderr, + "SKIPPED as DBCloud is supported only when USE_GCP is defined.\n"); + return 0; +} +#endif // USE_GCP + +#else // ROCKSDB_LITE +#include + +int main(int, char**) { + fprintf(stderr, "SKIPPED as DBCloud is not supported in ROCKSDB_LITE.\n"); + return 0; +} +#endif // ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/gcp/gcp_retry.cc b/cloud/gcp/gcp_retry.cc index 53e0355dd42..2c15f862594 100644 --- a/cloud/gcp/gcp_retry.cc +++ b/cloud/gcp/gcp_retry.cc @@ -1,8 +1,10 @@ -#include "cloud/gcp/gcp_file_system.h" -#include "rocksdb/cloud/cloud_file_system.h" #include +#include "rocksdb/cloud/cloud_file_system.h" + #ifdef USE_GCP +#include "cloud/gcp/gcp_file_system.h" + #include #include #include @@ -47,7 +49,7 @@ class GcpRetryPolicy : public gcs::RetryPolicy { "[gcs] Encountered failure: %s" "retry attempt %d exceeds max retries %d. Aborting...", s.message().c_str(), failure_count_, maximum_failures_); - // retry count exceed maxnum, but is not nonretryable + // retry count exceed maxium, but is not nonretryable return false; } } else { @@ -112,12 +114,12 @@ Status GcpCloudOptions::GetClientConfiguration(CloudFileSystem* fs, GcpRetryPolicy(fs, std::chrono::milliseconds(timeout_ms)).clone()); return Status::OK(); } -#else -Status GcpCloudOptions::GetClientConfiguration(CloudFileSystem*, - std::string const&, - gcp::Options&) { - return Status::NotSupported("Not configured for GCP support"); -} +//#else +//Status GcpCloudOptions::GetClientConfiguration(CloudFileSystem*, + //std::string const&, + //gcp::Options&) { + //return Status::NotSupported("Not configured for GCP support"); +//} #endif /* USE_GCP */ } // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/src.mk b/src.mk index 0fd8d0260d0..cb9767970b3 100644 --- a/src.mk +++ b/src.mk @@ -458,8 +458,8 @@ TEST_MAIN_SOURCES = \ cache/cache_reservation_manager_test.cc \ cloud/db_cloud_test.cc \ cloud/cloud_file_system_test.cc \ - cloud/gcp/gcp_file_system_test.cc \ - cloud/gcp/gcp_db_cloud_test.cc \ + cloud/gcp/gcp_file_system_test.cc \ + cloud/gcp/gcp_db_cloud_test.cc \ cloud/cloud_manifest_test.cc \ cloud/cloud_scheduler_test.cc \ cloud/replication_test.cc \ From d45a696550b853b4b1e63da0ef053e1cb3cdd65c Mon Sep 17 00:00:00 2001 From: githubzilla Date: Tue, 24 Oct 2023 04:32:14 +0000 Subject: [PATCH 06/16] make with USE_GCP=1 and without USE_AWS=1 work properly --- cloud/cloud_file_system.cc | 7 +++++-- cloud/cloud_file_system_test.cc | 25 +++++++++++++++++++++++++ cloud/gcp/gcp_db_cloud_test.cc | 5 ----- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/cloud/cloud_file_system.cc b/cloud/cloud_file_system.cc index 8bee6e83903..6b488261d30 100644 --- a/cloud/cloud_file_system.cc +++ b/cloud/cloud_file_system.cc @@ -485,11 +485,13 @@ int DoRegisterCloudObjects(ObjectLibrary& library, std::string const& arg) { }); count++; - count += CloudFileSystemImpl::RegisterAwsObjects(library, arg); -#ifdef USE_GCP // Only register GCP if we have it +#ifdef USE_GCP count += CloudFileSystemImpl::RegisterGcpObjects(library, arg); #endif +#ifdef USE_AWS + count += CloudFileSystemImpl::RegisterAwsObjects(library, arg); + // Register the Cloud Log Controllers library.AddFactory( @@ -503,6 +505,7 @@ int DoRegisterCloudObjects(ObjectLibrary& library, std::string const& arg) { return guard->get(); }); count++; +#endif return count; } diff --git a/cloud/cloud_file_system_test.cc b/cloud/cloud_file_system_test.cc index 89d106908d3..cc902049bf5 100644 --- a/cloud/cloud_file_system_test.cc +++ b/cloud/cloud_file_system_test.cc @@ -1,4 +1,7 @@ // Copyright (c) 2017 Rockset +#ifndef ROCKSDB_LITE + +#ifdef USE_AWS #include "rocksdb/cloud/cloud_file_system.h" @@ -247,3 +250,25 @@ int main(int argc, char** argv) { Aws::InitAPI(Aws::SDKOptions()); return RUN_ALL_TESTS(); } + +#else // USE_AWS + +#include + +int main(int, char**) { + fprintf(stderr, + "SKIPPED as DBCloud is supported only when USE_AWS is defined.\n"); + return 0; +} +#endif // USE_AWS + +#else // ROCKSDB_LITE + +#include + +int main(int, char**) { + fprintf(stderr, "SKIPPED as DBCloud is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/gcp/gcp_db_cloud_test.cc b/cloud/gcp/gcp_db_cloud_test.cc index 260b6df201c..8b2fa5b4d43 100644 --- a/cloud/gcp/gcp_db_cloud_test.cc +++ b/cloud/gcp/gcp_db_cloud_test.cc @@ -78,9 +78,6 @@ class CloudTest : public testing::Test { void Cleanup() { ASSERT_TRUE(!aenv_); - // check cloud credentials - ASSERT_TRUE(cloud_fs_options_.credentials.HasValid().ok()); - CloudFileSystem* afs; // create a dummy Gcp env ASSERT_OK(CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), @@ -183,8 +180,6 @@ class CloudTest : public testing::Test { void OpenWithColumnFamilies(const std::vector& cfs, std::vector* handles) { - ASSERT_TRUE(cloud_fs_options_.credentials.HasValid().ok()); - // Create new Gcp env CreateCloudEnv(); options_.env = aenv_.get(); From ea7fc15a7e37e6b64f15b60d27358e3daa2f4ded Mon Sep 17 00:00:00 2001 From: githubzilla Date: Wed, 25 Oct 2023 10:01:28 +0000 Subject: [PATCH 07/16] revert .clang-format changes --- .clang-format | 53 ++++----------------------------------------------- 1 file changed, 4 insertions(+), 49 deletions(-) diff --git a/.clang-format b/.clang-format index 7d9b39f7fe0..7c279811ac1 100644 --- a/.clang-format +++ b/.clang-format @@ -1,50 +1,5 @@ -# Use the Google style in this project. +# Complete list of style options can be found at: +# http://clang.llvm.org/docs/ClangFormatStyleOptions.html +--- BasedOnStyle: Google - -# Some folks prefer to write "int& foo" while others prefer "int &foo". The -# Google Style Guide only asks for consistency within a project, we chose -# "int& foo" for this project: -DerivePointerAlignment: false -PointerAlignment: Left - -# The Google Style Guide only asks for consistency w.r.t. "east const" vs. -# "const west" alignment of cv-qualifiers. In this project we use "east const". -QualifierAlignment: Right - -IncludeBlocks: Merge -IncludeCategories: -# Matches common headers first, but sorts them after project includes -- Regex: '^\"google/cloud/internal/disable_deprecation_warnings.inc\"$' - Priority: -1 -- Regex: '^\"google/cloud/(internal/|grpc_utils/|testing_util/|[^/]+\.h)' - Priority: 1000 -- Regex: '^\"google/cloud/' # project includes should sort first - Priority: 500 -- Regex: '^\"generator/' # project includes should sort first - Priority: 500 -- Regex: '^\"generator/internal/' # project internals second - Priority: 1000 -- Regex: '^\"generator/testing/' # testing helpers third - Priority: 1100 -- Regex: '^\"' # And then includes from other projects or the system - Priority: 1500 -- Regex: '^' - Priority: 4000 -- Regex: '^<[^/]*>' - Priority: 5000 - -# Format raw string literals with a `pb` or `proto` tag as proto. -RawStringFormats: -- Language: TextProto - Delimiters: - - 'pb' - - 'proto' - BasedOnStyle: Google - -CommentPragmas: '(@copydoc|@copybrief|@see|@overload|@snippet)' \ No newline at end of file +... From 6dac23baa8377e9b6cc40aee2ba5d7fdc3f76d33 Mon Sep 17 00:00:00 2001 From: githubzilla Date: Wed, 25 Oct 2023 10:51:27 +0000 Subject: [PATCH 08/16] revert .clang-format changes --- cloud/cloud_file_system.cc | 133 +++++++++--------- .../cloud/cloud_storage_provider_impl.h | 80 +++++------ 2 files changed, 107 insertions(+), 106 deletions(-) diff --git a/cloud/cloud_file_system.cc b/cloud/cloud_file_system.cc index 6b488261d30..9386ce93978 100644 --- a/cloud/cloud_file_system.cc +++ b/cloud/cloud_file_system.cc @@ -7,13 +7,15 @@ #else #include #endif +#include + #include "cloud/aws/aws_file_system.h" -#include "cloud/gcp/gcp_file_system.h" #include "cloud/cloud_file_system_impl.h" #include "cloud/cloud_log_controller_impl.h" #include "cloud/cloud_manifest.h" #include "cloud/db_cloud_impl.h" #include "cloud/filename.h" +#include "cloud/gcp/gcp_file_system.h" #include "env/composite_env_wrapper.h" #include "options/configurable_helper.h" #include "options/options_helper.h" @@ -28,7 +30,6 @@ #include "rocksdb/utilities/object_registry.h" #include "rocksdb/utilities/options_type.h" #include "util/string_util.h" -#include namespace ROCKSDB_NAMESPACE { @@ -87,8 +88,8 @@ void CloudFileSystemOptions::Dump(Logger* log) const { } } -bool CloudFileSystemOptions::GetNameFromEnvironment(char const* name, - char const* alt, +bool CloudFileSystemOptions::GetNameFromEnvironment(const char* name, + const char* alt, std::string* result) { char* value = getenv(name); // See if name is set in the environment if (value == nullptr && @@ -102,9 +103,9 @@ bool CloudFileSystemOptions::GetNameFromEnvironment(char const* name, return false; // No, return not found } } -void CloudFileSystemOptions::TEST_Initialize(std::string const& bucket, - std::string const& object, - std::string const& region) { +void CloudFileSystemOptions::TEST_Initialize(const std::string& bucket, + const std::string& object, + const std::string& region) { src_bucket.TEST_Initialize(bucket, object, region); dest_bucket = src_bucket; } @@ -151,9 +152,9 @@ void BucketOptions::SetBucketPrefix(std::string prefix) { // Initializes the bucket properties -void BucketOptions::TEST_Initialize(std::string const& bucket, - std::string const& object, - std::string const& region) { +void BucketOptions::TEST_Initialize(const std::string& bucket, + const std::string& object, + const std::string& region) { std::string prefix; // If the bucket name is not set, then the bucket name is not set, // Set it to either the value of the environment variable or geteuid @@ -189,20 +190,20 @@ static std::unordered_map {"object", {0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - std::string const& value, void* addr) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const std::string& value, void* addr) { auto bucket = static_cast(addr); bucket->SetObjectPath(value); return Status::OK(); }, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - void const* addr, std::string* value) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const void* addr, std::string* value) { auto bucket = static_cast(addr); *value = bucket->GetObjectPath(); return Status::OK(); }, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - void const* addr1, void const* addr2, std::string* /*mismatch*/) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const void* addr1, const void* addr2, std::string* /*mismatch*/) { auto bucket1 = static_cast(addr1); auto bucket2 = static_cast(addr2); return bucket1->GetObjectPath() == bucket2->GetObjectPath(); @@ -210,20 +211,20 @@ static std::unordered_map {"region", {0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - std::string const& value, void* addr) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const std::string& value, void* addr) { auto bucket = static_cast(addr); bucket->SetRegion(value); return Status::OK(); }, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - void const* addr, std::string* value) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const void* addr, std::string* value) { auto bucket = static_cast(addr); *value = bucket->GetRegion(); return Status::OK(); }, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - void const* addr1, void const* addr2, std::string* /*mismatch*/) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const void* addr1, const void* addr2, std::string* /*mismatch*/) { auto bucket1 = static_cast(addr1); auto bucket2 = static_cast(addr2); return bucket1->GetRegion() == bucket2->GetRegion(); @@ -231,20 +232,20 @@ static std::unordered_map {"prefix", {0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kNone, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - std::string const& value, void* addr) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const std::string& value, void* addr) { auto bucket = static_cast(addr); bucket->SetBucketName(bucket->GetBucketName(false), value); return Status::OK(); }, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - void const* addr, std::string* value) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const void* addr, std::string* value) { auto bucket = static_cast(addr); *value = bucket->GetBucketPrefix(); return Status::OK(); }, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - void const* addr1, void const* addr2, std::string* /*mismatch*/) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const void* addr1, const void* addr2, std::string* /*mismatch*/) { auto bucket1 = static_cast(addr1); auto bucket2 = static_cast(addr2); return bucket1->GetBucketPrefix() == bucket2->GetBucketPrefix(); @@ -252,20 +253,20 @@ static std::unordered_map {"bucket", {0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kNone, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - std::string const& value, void* addr) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const std::string& value, void* addr) { auto bucket = static_cast(addr); bucket->SetBucketName(value); return Status::OK(); }, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - void const* addr, std::string* value) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const void* addr, std::string* value) { auto bucket = static_cast(addr); *value = bucket->GetBucketName(false); return Status::OK(); }, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - void const* addr1, void const* addr2, std::string* /*mismatch*/) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const void* addr1, const void* addr2, std::string* /*mismatch*/) { auto bucket1 = static_cast(addr1); auto bucket2 = static_cast(addr2); return bucket1->GetBucketName(false) == @@ -274,8 +275,8 @@ static std::unordered_map {"TEST", {0, OptionType::kUnknown, OptionVerificationType::kAlias, OptionTypeFlags::kNone, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - std::string const& value, void* addr) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const std::string& value, void* addr) { auto bucket = static_cast(addr); std::string name = value; std::string path; @@ -340,8 +341,8 @@ const std::unordered_map OptionType::kConfigurable, OptionVerificationType::kByNameAllowNull, (OptionTypeFlags::kShared | OptionTypeFlags::kCompareLoose | OptionTypeFlags::kCompareNever | OptionTypeFlags::kAllowNull), - [](ConfigOptions const& opts, std::string const& /*name*/, - std::string const& value, void* addr) { + [](const ConfigOptions& opts, const std::string& /*name*/, + const std::string& value, void* addr) { auto provider = static_cast*>(addr); return CloudStorageProvider::CreateFromString(opts, value, @@ -353,8 +354,8 @@ const std::unordered_map (OptionTypeFlags::kShared | OptionTypeFlags::kCompareLoose | OptionTypeFlags::kCompareNever | OptionTypeFlags::kAllowNull), // Creates a new TableFactory based on value - [](ConfigOptions const& opts, std::string const& /*name*/, - std::string const& value, void* addr) { + [](const ConfigOptions& opts, const std::string& /*name*/, + const std::string& value, void* addr) { auto controller = static_cast*>(addr); Status s = @@ -372,8 +373,8 @@ const std::unordered_map {"TEST", {0, OptionType::kUnknown, OptionVerificationType::kAlias, OptionTypeFlags::kNone, - [](ConfigOptions const& /*opts*/, std::string const& /*name*/, - std::string const& value, void* addr) { + [](const ConfigOptions& /*opts*/, const std::string& /*name*/, + const std::string& value, void* addr) { auto copts = static_cast(addr); std::string name; std::string path; @@ -394,8 +395,8 @@ const std::unordered_map }}}, }; -Status CloudFileSystemOptions::Configure(ConfigOptions const& config_options, - std::string const& opts_str) { +Status CloudFileSystemOptions::Configure(const ConfigOptions& config_options, + const std::string& opts_str) { std::string current; Status s; if (!config_options.ignore_unknown_options) { @@ -419,12 +420,12 @@ Status CloudFileSystemOptions::Configure(ConfigOptions const& config_options, return s; } -Status CloudFileSystemOptions::Serialize(ConfigOptions const& config_options, +Status CloudFileSystemOptions::Serialize(const ConfigOptions& config_options, std::string* value) const { return OptionTypeInfo::SerializeStruct( config_options, CloudFileSystemOptions::kName(), &cloud_fs_option_type_info, CloudFileSystemOptions::kName(), - reinterpret_cast(this), value); + reinterpret_cast(this), value); } Status CloudFileSystemEnv::NewAwsFileSystem( @@ -450,12 +451,12 @@ Status CloudFileSystemEnv::NewAwsFileSystem( } Status CloudFileSystem::NewGcpFileSystem( - std::shared_ptr const& base_fs, - std::string const& src_cloud_bucket, std::string const& src_cloud_object, - std::string const& src_cloud_region, std::string const& dest_cloud_bucket, - std::string const& dest_cloud_object, std::string const& dest_cloud_region, - CloudFileSystemOptions const& cloud_options, - std::shared_ptr const& logger, CloudFileSystem** cfs) { + const std::shared_ptr& base_fs, + const std::string& src_cloud_bucket, const std::string& src_cloud_object, + const std::string& src_cloud_region, const std::string& dest_cloud_bucket, + const std::string& dest_cloud_object, const std::string& dest_cloud_region, + const CloudFileSystemOptions& cloud_options, + const std::shared_ptr& logger, CloudFileSystem** cfs) { CloudFileSystemOptions options = cloud_options; if (!src_cloud_bucket.empty()) options.src_bucket.SetBucketName(src_cloud_bucket); @@ -471,12 +472,12 @@ Status CloudFileSystem::NewGcpFileSystem( return NewGcpFileSystem(base_fs, options, logger, cfs); } -int DoRegisterCloudObjects(ObjectLibrary& library, std::string const& arg) { +int DoRegisterCloudObjects(ObjectLibrary& library, const std::string& arg) { int count = 0; // Register the FileSystem types library.AddFactory( CloudFileSystemImpl::kClassName(), - [](std::string const& /*uri*/, std::unique_ptr* guard, + [](const std::string& /*uri*/, std::unique_ptr* guard, std::string* /*errmsg*/) { guard->reset(new CloudFileSystemImpl(CloudFileSystemOptions(), FileSystem::Default(), @@ -496,7 +497,7 @@ int DoRegisterCloudObjects(ObjectLibrary& library, std::string const& arg) { library.AddFactory( CloudLogControllerImpl::kKafka(), - [](std::string const& /*uri*/, std::unique_ptr* guard, + [](const std::string& /*uri*/, std::unique_ptr* guard, std::string* errmsg) { Status s = CloudLogControllerImpl::CreateKafkaController(guard); if (!s.ok()) { @@ -669,28 +670,28 @@ Status CloudFileSystemEnv::NewAwsFileSystem( #ifndef USE_GCP Status CloudFileSystem::NewGcpFileSystem( - std::shared_ptr const& /*base_fs*/, - CloudFileSystemOptions const& /*options*/, - std::shared_ptr const& /*logger*/, CloudFileSystem** /*cfs*/) { + const std::shared_ptr& /*base_fs*/, + const CloudFileSystemOptions& /*options*/, + const std::shared_ptr& /*logger*/, CloudFileSystem** /*cfs*/) { return Status::NotSupported("RocksDB Cloud not compiled with GCP support"); } #else Status CloudFileSystem::NewGcpFileSystem( - std::shared_ptr const& base_fs, - CloudFileSystemOptions const& options, - std::shared_ptr const& logger, CloudFileSystem** cfs) { + const std::shared_ptr& base_fs, + const CloudFileSystemOptions& options, + const std::shared_ptr& logger, CloudFileSystem** cfs) { CloudFileSystem::RegisterCloudObjects(); - //Dump out cloud fs options + // Dump out cloud fs options options.Dump(logger.get()); Status st = GcpFileSystem::NewGcpFileSystem(base_fs, options, logger, cfs); - if(st.ok()) { - //store a copy to the logger + if (st.ok()) { + // store a copy to the logger auto* cloud = static_cast(*cfs); cloud->info_log_ = logger; - //start the purge thread only if there is a destination bucket - if(options.dest_bucket.IsValid() && options.run_purger) { + // start the purge thread only if there is a destination bucket + if (options.dest_bucket.IsValid() && options.run_purger) { cloud->purge_thread_ = std::thread([cloud] { cloud->Purger(); }); } } diff --git a/include/rocksdb/cloud/cloud_storage_provider_impl.h b/include/rocksdb/cloud/cloud_storage_provider_impl.h index 7c0c0a40007..cc4cb2f16fa 100644 --- a/include/rocksdb/cloud/cloud_storage_provider_impl.h +++ b/include/rocksdb/cloud/cloud_storage_provider_impl.h @@ -8,14 +8,14 @@ namespace ROCKSDB_NAMESPACE { class CloudStorageReadableFileImpl : public CloudStorageReadableFile { public: - CloudStorageReadableFileImpl(Logger* info_log, std::string const& bucket, - std::string const& fname, uint64_t size); + CloudStorageReadableFileImpl(Logger* info_log, const std::string& bucket, + const std::string& fname, uint64_t size); // sequential access, read data at current offset in file - IOStatus Read(size_t n, IOOptions const& options, Slice* result, + IOStatus Read(size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) override; // random access, read data from specified offset in file - IOStatus Read(uint64_t offset, size_t n, IOOptions const& options, + IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const override; @@ -23,7 +23,7 @@ class CloudStorageReadableFileImpl : public CloudStorageReadableFile { protected: virtual IOStatus DoCloudRead(uint64_t offset, size_t n, - IOOptions const& options, char* scratch, + const IOOptions& options, char* scratch, uint64_t* bytes_read, IODebugContext* dbg) const = 0; @@ -38,7 +38,7 @@ class CloudStorageReadableFileImpl : public CloudStorageReadableFile { class CloudStorageWritableFileImpl : public CloudStorageWritableFile { protected: CloudFileSystem* cfs_; - char const* class_; + const char* class_; std::string fname_; std::string tmp_file_; IOStatus status_; @@ -49,14 +49,14 @@ class CloudStorageWritableFileImpl : public CloudStorageWritableFile { public: CloudStorageWritableFileImpl(CloudFileSystem* fs, - std::string const& local_fname, - std::string const& bucket, - std::string const& cloud_fname, - FileOptions const& file_opts); + const std::string& local_fname, + const std::string& bucket, + const std::string& cloud_fname, + const FileOptions& file_opts); virtual ~CloudStorageWritableFileImpl(); using CloudStorageWritableFile::Append; - IOStatus Append(Slice const& data, IOOptions const& opts, + IOStatus Append(const Slice& data, const IOOptions& opts, IODebugContext* dbg) override { assert(status_.ok()); // write to temporary file @@ -64,16 +64,16 @@ class CloudStorageWritableFileImpl : public CloudStorageWritableFile { } using CloudStorageWritableFile::PositionedAppend; - IOStatus PositionedAppend(Slice const& data, uint64_t offset, - IOOptions const& opts, + IOStatus PositionedAppend(const Slice& data, uint64_t offset, + const IOOptions& opts, IODebugContext* dbg) override { return local_file_->PositionedAppend(data, offset, opts, dbg); } - IOStatus Truncate(uint64_t size, IOOptions const& opts, + IOStatus Truncate(uint64_t size, const IOOptions& opts, IODebugContext* dbg) override { return local_file_->Truncate(size, opts, dbg); } - IOStatus Fsync(IOOptions const& opts, IODebugContext* dbg) override { + IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override { return local_file_->Fsync(opts, dbg); } bool IsSyncThreadSafe() const override { @@ -83,7 +83,7 @@ class CloudStorageWritableFileImpl : public CloudStorageWritableFile { size_t GetRequiredBufferAlignment() const override { return local_file_->GetRequiredBufferAlignment(); } - uint64_t GetFileSize(IOOptions const& opts, IODebugContext* dbg) override { + uint64_t GetFileSize(const IOOptions& opts, IODebugContext* dbg) override { return local_file_->GetFileSize(opts, dbg); } size_t GetUniqueId(char* id, size_t max_size) const override { @@ -92,22 +92,22 @@ class CloudStorageWritableFileImpl : public CloudStorageWritableFile { IOStatus InvalidateCache(size_t offset, size_t length) override { return local_file_->InvalidateCache(offset, length); } - IOStatus RangeSync(uint64_t offset, uint64_t nbytes, IOOptions const& opts, + IOStatus RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& opts, IODebugContext* dbg) override { return local_file_->RangeSync(offset, nbytes, opts, dbg); } - IOStatus Allocate(uint64_t offset, uint64_t len, IOOptions const& opts, + IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& opts, IODebugContext* dbg) override { return local_file_->Allocate(offset, len, opts, dbg); } - IOStatus Flush(IOOptions const& opts, IODebugContext* dbg) override { + IOStatus Flush(const IOOptions& opts, IODebugContext* dbg) override { assert(status_.ok()); return local_file_->Flush(opts, dbg); } IOStatus status() override { return status_; } - IOStatus Sync(IOOptions const& opts, IODebugContext* dbg) override; - IOStatus Close(IOOptions const& opts, IODebugContext* dbg) override; + IOStatus Sync(const IOOptions& opts, IODebugContext* dbg) override; + IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override; }; // All writes to this DB can be configured to be persisted @@ -119,40 +119,40 @@ class CloudStorageProviderImpl : public CloudStorageProvider { static Status CreateS3Provider(std::unique_ptr* result); static Status CreateGcsProvider( std::unique_ptr* result); - static char const* kS3() { return "s3"; } - static char const* kGcs() { return "gcs"; } + static const char* kS3() { return "s3"; } + static const char* kGcs() { return "gcs"; } CloudStorageProviderImpl(); virtual ~CloudStorageProviderImpl(); - IOStatus GetCloudObject(std::string const& bucket_name, - std::string const& object_path, - std::string const& local_destination) override; - IOStatus PutCloudObject(std::string const& local_file, - std::string const& bucket_name, - std::string const& object_path) override; + IOStatus GetCloudObject(const std::string& bucket_name, + const std::string& object_path, + const std::string& local_destination) override; + IOStatus PutCloudObject(const std::string& local_file, + const std::string& bucket_name, + const std::string& object_path) override; IOStatus NewCloudReadableFile( - std::string const& bucket, std::string const& fname, - FileOptions const& options, + const std::string& bucket, const std::string& fname, + const FileOptions& options, std::unique_ptr* result, IODebugContext* dbg) override; - Status PrepareOptions(ConfigOptions const& options) override; + Status PrepareOptions(const ConfigOptions& options) override; protected: std::unique_ptr rng_; virtual IOStatus DoNewCloudReadableFile( - std::string const& bucket, std::string const& fname, uint64_t fsize, - std::string const& content_hash, FileOptions const& options, + const std::string& bucket, const std::string& fname, uint64_t fsize, + const std::string& content_hash, const FileOptions& options, std::unique_ptr* result, IODebugContext* dbg) = 0; // Downloads object from the cloud into a local directory - virtual IOStatus DoGetCloudObject(std::string const& bucket_name, - std::string const& object_path, - std::string const& local_path, + virtual IOStatus DoGetCloudObject(const std::string& bucket_name, + const std::string& object_path, + const std::string& local_path, uint64_t* remote_size) = 0; - virtual IOStatus DoPutCloudObject(std::string const& local_file, - std::string const& object_path, - std::string const& bucket_name, + virtual IOStatus DoPutCloudObject(const std::string& local_file, + const std::string& object_path, + const std::string& bucket_name, uint64_t file_size) = 0; CloudFileSystem* cfs_; From d47c5149ba8398dbf078faf55888c1de01ca5965 Mon Sep 17 00:00:00 2001 From: githubzilla Date: Thu, 26 Oct 2023 06:50:50 +0000 Subject: [PATCH 09/16] revert unnecessary file changes --- CMakeLists.txt | 2 +- cloud/aws/aws_s3.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb447b32f06..e088e94fdc8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1651,4 +1651,4 @@ endif() option(WITH_BENCHMARK "build benchmark tests" OFF) if(WITH_BENCHMARK) add_subdirectory(${PROJECT_SOURCE_DIR}/microbench/) -endif() \ No newline at end of file +endif() diff --git a/cloud/aws/aws_s3.cc b/cloud/aws/aws_s3.cc index 300eda53c36..ce9cda635cd 100644 --- a/cloud/aws/aws_s3.cc +++ b/cloud/aws/aws_s3.cc @@ -688,7 +688,7 @@ IOStatus S3StorageProvider::ExistsCloudObject(const std::string& bucket_name, IOStatus S3StorageProvider::GetCloudObjectSize(const std::string& bucket_name, const std::string& object_path, uint64_t* filesize) { - HeadObjectResult result; + HeadObjectResult result; result.size = filesize; return HeadObject(bucket_name, object_path, &result); } @@ -1064,4 +1064,4 @@ Status CloudStorageProviderImpl::CreateS3Provider( #endif /* USE_AWS */ } } // namespace ROCKSDB_NAMESPACE -#endif // ROCKSDB_LITE \ No newline at end of file +#endif // ROCKSDB_LITE From a0b7172eb5480e4646c00a636605c1e8ebbfba30 Mon Sep 17 00:00:00 2001 From: githubzilla Date: Thu, 26 Oct 2023 06:51:13 +0000 Subject: [PATCH 10/16] revert .clang-format changes --- include/rocksdb/cloud/cloud_file_system.h | 151 +++++++++++----------- 1 file changed, 74 insertions(+), 77 deletions(-) diff --git a/include/rocksdb/cloud/cloud_file_system.h b/include/rocksdb/cloud/cloud_file_system.h index 56aba4f2634..f408335a528 100644 --- a/include/rocksdb/cloud/cloud_file_system.h +++ b/include/rocksdb/cloud/cloud_file_system.h @@ -1,11 +1,6 @@ // Copyright (c) 2016-present, Rockset, Inc. All rights reserved. // #pragma once -#include "rocksdb/cache.h" -#include "rocksdb/configurable.h" -#include "rocksdb/file_system.h" -#include "rocksdb/io_status.h" -#include "rocksdb/status.h" #include #include #include @@ -13,6 +8,12 @@ #include #include +#include "rocksdb/cache.h" +#include "rocksdb/configurable.h" +#include "rocksdb/file_system.h" +#include "rocksdb/io_status.h" +#include "rocksdb/status.h" + namespace Aws { namespace Auth { class AWSCredentialsProvider; @@ -90,10 +91,10 @@ class AwsCloudAccessCredentials { // functions to support AWS credentials // // Initialize AWS credentials using access_key_id and secret_key - void InitializeSimple(std::string const& aws_access_key_id, - std::string const& aws_secret_key); + void InitializeSimple(const std::string& aws_access_key_id, + const std::string& aws_secret_key); // Initialize AWS credentials using a config file - void InitializeConfig(std::string const& aws_config_file); + void InitializeConfig(const std::string& aws_config_file); // test if valid AWS credentials are present Status HasValid() const; @@ -104,7 +105,7 @@ class AwsCloudAccessCredentials { private: AwsAccessType GetAccessType() const; - Status CheckCredentials(AwsAccessType const& aws_type) const; + Status CheckCredentials(const AwsAccessType& aws_type) const; public: std::string access_key_id; @@ -117,13 +118,13 @@ class AwsCloudAccessCredentials { }; using S3ClientFactory = std::function( - std::shared_ptr const&, - Aws::Client::ClientConfiguration const&)>; + const std::shared_ptr&, + const Aws::Client::ClientConfiguration&)>; #ifdef USE_GCP using GCSClientFactory = std::function( - google::cloud::Options const&)>; + const google::cloud::Options&)>; #endif // Defines parameters required to connect to Kafka @@ -169,26 +170,21 @@ class BucketOptions { void SetBucketPrefix(std::string prefix); const std::string& GetBucketPrefix() const { return prefix_; } const std::string& GetBucketName(bool full = true) const { -======= - void SetBucketName(std::string const& bucket, std::string const& prefix = ""); - std::string const& GetBucketPrefix() const { return prefix_; } - std::string const& GetBucketName(bool full = true) const { ->>>>>>> c266119fc (Google cloud storage support) if (full) { return name_; } else { return bucket_; } } - std::string const& GetObjectPath() const { return object_; } - void SetObjectPath(std::string const& object) { object_ = object; } - std::string const& GetRegion() const { return region_; } - void SetRegion(std::string const& region) { region_ = region; } + const std::string& GetObjectPath() const { return object_; } + void SetObjectPath(const std::string& object) { object_ = object; } + const std::string& GetRegion() const { return region_; } + void SetRegion(const std::string& region) { region_ = region; } // Initializes the bucket properties for test purposes - void TEST_Initialize(std::string const& name_prefix, - std::string const& object_path, - std::string const& region = ""); + void TEST_Initialize(const std::string& name_prefix, + const std::string& object_path, + const std::string& region = ""); bool IsValid() const { if (object_.empty() || name_.empty()) { return false; @@ -198,7 +194,7 @@ class BucketOptions { } }; -inline bool operator==(BucketOptions const& lhs, BucketOptions const& rhs) { +inline bool operator==(const BucketOptions& lhs, const BucketOptions& rhs) { if (lhs.IsValid() && rhs.IsValid()) { return ((lhs.GetBucketName() == rhs.GetBucketName()) && (lhs.GetObjectPath() == rhs.GetObjectPath()) && @@ -207,14 +203,14 @@ inline bool operator==(BucketOptions const& lhs, BucketOptions const& rhs) { return false; } } -inline bool operator!=(BucketOptions const& lhs, BucketOptions const& rhs) { +inline bool operator!=(const BucketOptions& lhs, const BucketOptions& rhs) { return !(lhs == rhs); } class AwsCloudOptions { public: static Status GetClientConfiguration( - CloudFileSystem* fs, std::string const& region, + CloudFileSystem* fs, const std::string& region, Aws::Client::ClientConfiguration* config); }; @@ -225,7 +221,7 @@ class AwsCloudOptions { class CloudFileSystemOptions { private: public: - static char const* kName() { return "CloudFileSystemOptions"; } + static const char* kName() { return "CloudFileSystemOptions"; } BucketOptions src_bucket; BucketOptions dest_bucket; // Specify the type of cloud-service to use. Deprecated. @@ -489,11 +485,11 @@ class CloudFileSystemOptions { // Sets result based on the value of name or alt in the environment // Returns true if the name/alt exists in the environment, false otherwise - static bool GetNameFromEnvironment(char const* name, char const* alt, + static bool GetNameFromEnvironment(const char* name, const char* alt, std::string* result); - void TEST_Initialize(std::string const& name_prefix, - std::string const& object_path, - std::string const& region = ""); + void TEST_Initialize(const std::string& name_prefix, + const std::string& object_path, + const std::string& region = ""); Status Configure(const ConfigOptions& config_options, const std::string& opts_str); @@ -532,31 +528,31 @@ class CloudFileSystem : public FileSystem { virtual IOStatus PreloadCloudManifest(const std::string& local_dbname) = 0; // This method will migrate the database that is using pure RocksDB into // RocksDB-Cloud. Call this before opening the database with RocksDB-Cloud. - virtual IOStatus MigrateFromPureRocksDB(std::string const& local_dbname) = 0; + virtual IOStatus MigrateFromPureRocksDB(const std::string& local_dbname) = 0; // Reads a file from the cloud virtual IOStatus NewSequentialFileCloud( - std::string const& bucket_prefix, std::string const& fname, - FileOptions const& file_opts, std::unique_ptr* result, + const std::string& bucket_prefix, const std::string& fname, + const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) = 0; // Saves and retrieves the dbid->dirname mapping in cloud storage - virtual IOStatus SaveDbid(std::string const& bucket_name, - std::string const& dbid, - std::string const& dirname) = 0; - virtual IOStatus GetPathForDbid(std::string const& bucket_prefix, - std::string const& dbid, + virtual IOStatus SaveDbid(const std::string& bucket_name, + const std::string& dbid, + const std::string& dirname) = 0; + virtual IOStatus GetPathForDbid(const std::string& bucket_prefix, + const std::string& dbid, std::string* dirname) = 0; - virtual IOStatus GetDbidList(std::string const& bucket_prefix, + virtual IOStatus GetDbidList(const std::string& bucket_prefix, DbidList* dblist) = 0; - virtual IOStatus DeleteDbid(std::string const& bucket_prefix, - std::string const& dbid) = 0; + virtual IOStatus DeleteDbid(const std::string& bucket_prefix, + const std::string& dbid) = 0; // Deletes file from a destination bucket. - virtual IOStatus DeleteCloudFileFromDest(std::string const& fname) = 0; + virtual IOStatus DeleteCloudFileFromDest(const std::string& fname) = 0; // Copies a local file to a destination bucket. - virtual IOStatus CopyLocalFileToDest(std::string const& local_name, - std::string const& cloud_name) = 0; + virtual IOStatus CopyLocalFileToDest(const std::string& local_name, + const std::string& cloud_name) = 0; // Returns CloudManifest file name for a given db. virtual std::string CloudManifestFile(const std::string& dbname) = 0; @@ -584,14 +580,14 @@ class CloudFileSystem : public FileSystem { // For example, it will map 00010.sst to 00010.sst-[epoch] where [epoch] is // an epoch during which that file was created. // Files both in S3 and in the local directory have this [epoch] suffix. - virtual std::string RemapFilename(std::string const& logical_name) const = 0; + virtual std::string RemapFilename(const std::string& logical_name) const = 0; // Find the list of live files based on CloudManifest and Manifest in local db // // For the returned filepath in `live_sst_files` and `manifest_file`, we only // include the basename of the filepath but not the directory prefix to the // file - virtual IOStatus FindAllLiveFiles(std::string const& local_dbname, + virtual IOStatus FindAllLiveFiles(const std::string& local_dbname, std::vector* live_sst_files, std::string* manifest_file) = 0; @@ -610,7 +606,7 @@ class CloudFileSystem : public FileSystem { // // If delta has already been applied in cloud manifest, delta_applied would be // `false` - virtual IOStatus ApplyCloudManifestDelta(CloudManifestDelta const& delta, + virtual IOStatus ApplyCloudManifestDelta(const CloudManifestDelta& delta, bool* delta_applied) = 0; // This function does several things: @@ -622,16 +618,17 @@ class CloudFileSystem : public FileSystem { // // Return InvalidArgument status if the delta has been applied in current // CloudManifest - virtual IOStatus RollNewCookie(std::string const& local_dbname, - std::string const& cookie, - CloudManifestDelta const& delta) const = 0; + virtual IOStatus RollNewCookie(const std::string& local_dbname, + const std::string& cookie, + const CloudManifestDelta& delta) const = 0; virtual IOStatus GetMaxFileNumberFromCurrentManifest( - std::string const& local_dbname, uint64_t* max_file_number) = 0; + const std::string& local_dbname, uint64_t* max_file_number) = 0; // Delete both local and cloud invisble files virtual IOStatus DeleteCloudInvisibleFiles( const std::vector& active_cookies) = 0; + // Delete local invisible files. This could be helpful when there is one // single instance managing lifetime of files in cloud while the other // instances reference and download the files in cloud. The other instances @@ -702,34 +699,34 @@ class CloudFileSystemEnv { // data from cloud storage. // If dest_bucket_name is empty, then the associated db does not write any // data to cloud storage. - static Status NewAwsFileSystem(std::shared_ptr const& base_fs, - std::string const& src_bucket_name, - std::string const& src_object_prefix, - std::string const& src_bucket_region, - std::string const& dest_bucket_name, - std::string const& dest_object_prefix, - std::string const& dest_bucket_region, - CloudFileSystemOptions const& fs_options, - std::shared_ptr const& logger, + static Status NewAwsFileSystem(const std::shared_ptr& base_fs, + const std::string& src_bucket_name, + const std::string& src_object_prefix, + const std::string& src_bucket_region, + const std::string& dest_bucket_name, + const std::string& dest_object_prefix, + const std::string& dest_bucket_region, + const CloudFileSystemOptions& fs_options, + const std::shared_ptr& logger, CloudFileSystem** cfs); - static Status NewAwsFileSystem(std::shared_ptr const& base_fs, - CloudFileSystemOptions const& fs_options, - std::shared_ptr const& logger, + static Status NewAwsFileSystem(const std::shared_ptr& base_fs, + const CloudFileSystemOptions& fs_options, + const std::shared_ptr& logger, CloudFileSystem** cfs); - static Status NewGcpFileSystem(std::shared_ptr const& base_fs, - std::string const& src_bucket_name, - std::string const& src_object_prefix, - std::string const& src_buck_region, - std::string const& dest_bucket_name, - std::string const& dest_bucket_prefix, - std::string const& dest_bucket_region, - CloudFileSystemOptions const& fs_options, - std::shared_ptr const& logger, + static Status NewGcpFileSystem(const std::shared_ptr& base_fs, + const std::string& src_bucket_name, + const std::string& src_object_prefix, + const std::string& src_buck_region, + const std::string& dest_bucket_name, + const std::string& dest_bucket_prefix, + const std::string& dest_bucket_region, + const CloudFileSystemOptions& fs_options, + const std::shared_ptr& logger, CloudFileSystem** cfs); - static Status NewGcpFileSystem(std::shared_ptr const& base_fs, - CloudFileSystemOptions const& fs_options, - std::shared_ptr const& logger, + static Status NewGcpFileSystem(const std::shared_ptr& base_fs, + const CloudFileSystemOptions& fs_options, + const std::shared_ptr& logger, CloudFileSystem** cfs); // Creates a new Env that delegates all thread/time related From 1b0e8c5bf46b6fc706919d94fe7fd50681261eb6 Mon Sep 17 00:00:00 2001 From: githubzilla Date: Tue, 7 Nov 2023 07:35:21 +0000 Subject: [PATCH 11/16] Fix ld error - absl::variant_internal::ThrowBadVariantAccess not found --- build_tools/build_detect_platform | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 9dfa4396431..4e398037d83 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -646,7 +646,7 @@ if [ "${USE_GCP}XXX" = "1XXX" ]; then GCP_SDK=/usr/local GCI=${GCP_SDK}/include/ GCS_CCFLAGS="$GCS_CCFLAGS -I$GCI -DUSE_GCP" - GCS_LDFLAGS="$GCS_LDFLAGS -lgoogle_cloud_cpp_common -lgoogle_cloud_cpp_storage" + GCS_LDFLAGS="$GCS_LDFLAGS -lgoogle_cloud_cpp_common -lgoogle_cloud_cpp_storage -labsl_bad_variant_access" COMMON_FLAGS="$COMMON_FLAGS $GCS_CCFLAGS" PLATFORM_LDFLAGS="$GCS_LDFLAGS $PLATFORM_LDFLAGS" fi From a2356fc8cbb2e5e019907a00874a011f54fc79dd Mon Sep 17 00:00:00 2001 From: githubzilla Date: Fri, 15 Dec 2023 08:29:45 +0000 Subject: [PATCH 12/16] Adjust backoff retry policy initial setting to 500ms --- cloud/gcp/gcp_retry.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cloud/gcp/gcp_retry.cc b/cloud/gcp/gcp_retry.cc index 2c15f862594..6409ee370b7 100644 --- a/cloud/gcp/gcp_retry.cc +++ b/cloud/gcp/gcp_retry.cc @@ -98,9 +98,10 @@ Status GcpCloudOptions::GetClientConfiguration(CloudFileSystem* fs, options.set( gcs::AlwaysRetryIdempotencyPolicy().clone()); - // Use exponential backoff with a 1ms initial delay, 1 minute maximum delay, + // Use exponential backoff with a 500ms initial delay, 1 minute maximum delay, + // GCS only allows one write per second per object options.set( - gcs::ExponentialBackoffPolicy(std::chrono::milliseconds(1), + gcs::ExponentialBackoffPolicy(std::chrono::milliseconds(500), std::chrono::minutes(1), 2.0) .clone()); From d6fb3d43bd6c86f1e133252ef61fe0ada5d51418 Mon Sep 17 00:00:00 2001 From: githubzilla Date: Tue, 5 Nov 2024 19:43:40 +0800 Subject: [PATCH 13/16] fix build error of missing the lib of absl_bad_optional_access --- build_tools/build_detect_platform | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 4e398037d83..376abda8819 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -646,7 +646,7 @@ if [ "${USE_GCP}XXX" = "1XXX" ]; then GCP_SDK=/usr/local GCI=${GCP_SDK}/include/ GCS_CCFLAGS="$GCS_CCFLAGS -I$GCI -DUSE_GCP" - GCS_LDFLAGS="$GCS_LDFLAGS -lgoogle_cloud_cpp_common -lgoogle_cloud_cpp_storage -labsl_bad_variant_access" + GCS_LDFLAGS="$GCS_LDFLAGS -lgoogle_cloud_cpp_common -lgoogle_cloud_cpp_storage -labsl_bad_variant_access -labsl_bad_optional_access" COMMON_FLAGS="$COMMON_FLAGS $GCS_CCFLAGS" PLATFORM_LDFLAGS="$GCS_LDFLAGS $PLATFORM_LDFLAGS" fi @@ -842,4 +842,4 @@ if test -n "$USE_FOLLY"; then fi if test -n "$PPC_LIBC_IS_GNU"; then echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT" -fi \ No newline at end of file +fi From 052b22ba65e8db13466a840ffdfd11209f4fae06 Mon Sep 17 00:00:00 2001 From: githubzilla Date: Wed, 6 Nov 2024 18:16:14 +0800 Subject: [PATCH 14/16] Fix build errors after rebase --- cloud/cloud_file_system.cc | 10 +++++----- cloud/gcp/gcp_cs.cc | 6 ++++-- cloud/gcp/gcp_db_cloud_test.cc | 29 +++++++++++++++-------------- cloud/gcp/gcp_file_system.cc | 11 ++++++----- cloud/gcp/gcp_file_system.h | 5 +++-- cloud/gcp/gcp_file_system_test.cc | 30 +++++++++++++++--------------- db/version_set.cc | 2 ++ 7 files changed, 50 insertions(+), 43 deletions(-) diff --git a/cloud/cloud_file_system.cc b/cloud/cloud_file_system.cc index 9386ce93978..bc9d78e3da0 100644 --- a/cloud/cloud_file_system.cc +++ b/cloud/cloud_file_system.cc @@ -10,7 +10,7 @@ #include #include "cloud/aws/aws_file_system.h" -#include "cloud/cloud_file_system_impl.h" +#include "rocksdb/cloud/cloud_file_system_impl.h" #include "cloud/cloud_log_controller_impl.h" #include "cloud/cloud_manifest.h" #include "cloud/db_cloud_impl.h" @@ -450,7 +450,7 @@ Status CloudFileSystemEnv::NewAwsFileSystem( return NewAwsFileSystem(base_fs, options, logger, cfs); } -Status CloudFileSystem::NewGcpFileSystem( +Status CloudFileSystemEnv::NewGcpFileSystem( const std::shared_ptr& base_fs, const std::string& src_cloud_bucket, const std::string& src_cloud_object, const std::string& src_cloud_region, const std::string& dest_cloud_bucket, @@ -669,18 +669,18 @@ Status CloudFileSystemEnv::NewAwsFileSystem( #endif #ifndef USE_GCP -Status CloudFileSystem::NewGcpFileSystem( +Status CloudFileSystemEnv::NewGcpFileSystem( const std::shared_ptr& /*base_fs*/, const CloudFileSystemOptions& /*options*/, const std::shared_ptr& /*logger*/, CloudFileSystem** /*cfs*/) { return Status::NotSupported("RocksDB Cloud not compiled with GCP support"); } #else -Status CloudFileSystem::NewGcpFileSystem( +Status CloudFileSystemEnv::NewGcpFileSystem( const std::shared_ptr& base_fs, const CloudFileSystemOptions& options, const std::shared_ptr& logger, CloudFileSystem** cfs) { - CloudFileSystem::RegisterCloudObjects(); + CloudFileSystemEnv::RegisterCloudObjects(); // Dump out cloud fs options options.Dump(logger.get()); diff --git a/cloud/gcp/gcp_cs.cc b/cloud/gcp/gcp_cs.cc index e0420f700f6..72f67ebd71d 100644 --- a/cloud/gcp/gcp_cs.cc +++ b/cloud/gcp/gcp_cs.cc @@ -7,12 +7,14 @@ namespace gcs = ::google::cloud::storage; namespace gcp = ::google::cloud; #endif -#include "cloud/cloud_storage_provider_impl.h" +#include "rocksdb/cloud/cloud_storage_provider_impl.h" #include "cloud/filename.h" #include "cloud/gcp/gcp_file_system.h" #include "rocksdb/cloud/cloud_file_system.h" #include "rocksdb/convenience.h" +#include "port/port_posix.h" #include +#include #ifdef _WIN32_WINNT #undef GetMessage @@ -721,4 +723,4 @@ Status CloudStorageProviderImpl::CreateGcsProvider( #endif } } // namespace ROCKSDB_NAMESPACE -#endif // ROCKSDB_LITE \ No newline at end of file +#endif // ROCKSDB_LITE diff --git a/cloud/gcp/gcp_db_cloud_test.cc b/cloud/gcp/gcp_db_cloud_test.cc index 8b2fa5b4d43..eabc493ef07 100644 --- a/cloud/gcp/gcp_db_cloud_test.cc +++ b/cloud/gcp/gcp_db_cloud_test.cc @@ -12,10 +12,11 @@ #include #include -#include "cloud/cloud_file_deletion_scheduler.h" -#include "cloud/cloud_file_system_impl.h" +#include "rocksdb/cloud/cloud_file_deletion_scheduler.h" +#include "rocksdb/cloud/cloud_file_system_impl.h" #include "cloud/cloud_scheduler.h" -#include "cloud/cloud_storage_provider_impl.h" +#include "cloud/cloud_manifest.h" +#include "rocksdb/cloud/cloud_storage_provider_impl.h" #include "cloud/db_cloud_impl.h" #include "cloud/filename.h" #include "cloud/manifest_reader.h" @@ -80,7 +81,7 @@ class CloudTest : public testing::Test { CloudFileSystem* afs; // create a dummy Gcp env - ASSERT_OK(CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), + ASSERT_OK(CloudFileSystemEnv::NewGcpFileSystem(base_env_->GetFileSystem(), cloud_fs_options_, options_.info_log, &afs)); ASSERT_NE(afs, nullptr); @@ -140,7 +141,7 @@ class CloudTest : public testing::Test { // Cleanup the cloud bucket if (!cloud_fs_options_.src_bucket.GetBucketName().empty()) { CloudFileSystem* afs; - Status st = CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), + Status st = CloudFileSystemEnv::NewGcpFileSystem(base_env_->GetFileSystem(), cloud_fs_options_, options_.info_log, &afs); if (st.ok()) { @@ -155,11 +156,11 @@ class CloudTest : public testing::Test { void CreateCloudEnv() { CloudFileSystem* cfs; - ASSERT_OK(CloudFileSystem::NewGcpFileSystem(base_env_->GetFileSystem(), + ASSERT_OK(CloudFileSystemEnv::NewGcpFileSystem(base_env_->GetFileSystem(), cloud_fs_options_, options_.info_log, &cfs)); std::shared_ptr fs(cfs); - aenv_ = CloudFileSystem::NewCompositeEnv(base_env_, std::move(fs)); + aenv_ = CloudFileSystemEnv::NewCompositeEnv(base_env_, std::move(fs)); } // Open database via the cloud interface @@ -245,7 +246,7 @@ class CloudTest : public testing::Test { copt.keep_local_sst_files = true; } // Create new Gcp env - Status st = CloudFileSystem::NewGcpFileSystem( + Status st = CloudFileSystemEnv::NewGcpFileSystem( base_env_->GetFileSystem(), copt, options_.info_log, &cfs); if (!st.ok()) { return st; @@ -2447,7 +2448,7 @@ TEST_F(CloudTest, DisableObsoleteFileDeletionOnOpenTest) { // obsolete files are not deleted EXPECT_EQ(GetAllLocalFiles().size(), 8); // obsolete files are deleted! - db_->EnableFileDeletions(false /* force */); + db_->EnableFileDeletions(); EXPECT_EQ(GetAllLocalFiles().size(), 6); CloseDB(); } @@ -2820,7 +2821,7 @@ TEST_F(CloudTest, SanitizeDirectoryTest) { EXPECT_EQ(local_files.size(), 7); EXPECT_OK( - GetCloudFileSystemImpl()->SanitizeDirectory(options_, dbname_, false)); + GetCloudFileSystemImpl()->SanitizeLocalDirectory(options_, dbname_, false)); // cleaning up during sanitization not triggered EXPECT_EQ(local_files.size(), GetAllLocalFiles().size()); @@ -2830,7 +2831,7 @@ TEST_F(CloudTest, SanitizeDirectoryTest) { base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); EXPECT_OK( - GetCloudFileSystemImpl()->SanitizeDirectory(options_, dbname_, false)); + GetCloudFileSystemImpl()->SanitizeLocalDirectory(options_, dbname_, false)); local_files = GetAllLocalFiles(); // IDENTITY file is downloaded after cleaning up, which is the only file that @@ -2852,7 +2853,7 @@ TEST_F(CloudTest, SanitizeDirectoryTest) { base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); ASSERT_OK( - GetCloudFileSystemImpl()->SanitizeDirectory(options_, dbname_, false)); + GetCloudFileSystemImpl()->SanitizeLocalDirectory(options_, dbname_, false)); // IDENTITY file + the random directory we created EXPECT_EQ(GetAllLocalFiles().size(), 2); @@ -2874,7 +2875,7 @@ TEST_F(CloudTest, SanitizeDirectoryTest) { base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); ASSERT_OK( - GetCloudFileSystemImpl()->SanitizeDirectory(options_, dbname_, false)); + GetCloudFileSystemImpl()->SanitizeLocalDirectory(options_, dbname_, false)); SyncPoint::GetInstance()->DisableProcessing(); } @@ -3145,4 +3146,4 @@ int main(int, char**) { return 0; } -#endif // !ROCKSDB_LITE \ No newline at end of file +#endif // !ROCKSDB_LITE diff --git a/cloud/gcp/gcp_file_system.cc b/cloud/gcp/gcp_file_system.cc index 973a30b4ec2..981510349a7 100644 --- a/cloud/gcp/gcp_file_system.cc +++ b/cloud/gcp/gcp_file_system.cc @@ -2,14 +2,14 @@ #ifdef USE_GCP +#include "cloud/gcp/gcp_file_system.h" + #include +#include "rocksdb/cloud/cloud_storage_provider_impl.h" #include "rocksdb/convenience.h" #include "rocksdb/utilities/object_registry.h" -#include "cloud/gcp/gcp_file_system.h" -#include "cloud/cloud_storage_provider_impl.h" - namespace ROCKSDB_NAMESPACE { GcpFileSystem::GcpFileSystem(std::shared_ptr const& underlying_fs, CloudFileSystemOptions const& cloud_options, @@ -28,7 +28,8 @@ Status GcpFileSystem::NewGcpFileSystem( } std::unique_ptr gfs( new GcpFileSystem(fs, cloud_options, info_log)); - auto env = gfs->NewCompositeEnvFromThis(Env::Default()); + auto env = + CloudFileSystemEnv::NewCompositeEnvFromFs(gfs.get(), Env::Default()); ConfigOptions config_options; config_options.env = env.get(); status = gfs->PrepareOptions(config_options); @@ -107,4 +108,4 @@ int CloudFileSystemImpl::RegisterGcpObjects(ObjectLibrary& library, } } // namespace ROCKSDB_NAMESPACE #endif // USE_GCP -#endif // ROCKSDB_LITE \ No newline at end of file +#endif // ROCKSDB_LITE diff --git a/cloud/gcp/gcp_file_system.h b/cloud/gcp/gcp_file_system.h index 1e1df6dcb1f..056b5f19f9f 100644 --- a/cloud/gcp/gcp_file_system.h +++ b/cloud/gcp/gcp_file_system.h @@ -1,7 +1,8 @@ #pragma once #include -#include "cloud/cloud_file_system_impl.h" +#include "rocksdb/cloud/cloud_file_system_impl.h" +#include "cloud/filename.h" #ifdef USE_GCP @@ -36,4 +37,4 @@ class GcpCloudOptions { google::cloud::Options& options); }; } // namespace ROCKSDB_NAMESPACE -#endif \ No newline at end of file +#endif diff --git a/cloud/gcp/gcp_file_system_test.cc b/cloud/gcp/gcp_file_system_test.cc index 9a5b0311c0e..1e2c96691a3 100644 --- a/cloud/gcp/gcp_file_system_test.cc +++ b/cloud/gcp/gcp_file_system_test.cc @@ -2,14 +2,15 @@ #ifndef ROCKSDB_LITE -#ifdef USE_GCP +#include -#include "rocksdb/cloud/cloud_file_system.h" +#ifdef USE_GCP #include "cloud/cloud_log_controller_impl.h" -#include "cloud/cloud_storage_provider_impl.h" +#include "rocksdb/cloud/cloud_file_system.h" #include "rocksdb/cloud/cloud_log_controller.h" #include "rocksdb/cloud/cloud_storage_provider.h" +#include "rocksdb/cloud/cloud_storage_provider_impl.h" #include "rocksdb/convenience.h" #include "rocksdb/env.h" #include "test_util/testharness.h" @@ -114,7 +115,7 @@ TEST(CloudFileSystemTest, ConfigureEnv) { ConfigOptions config_options; config_options.invoke_prepare_options = false; - ASSERT_OK(CloudFileSystem::CreateFromString( + ASSERT_OK(CloudFileSystemEnv::CreateFromString( config_options, "keep_local_sst_files=true", &cfs)); ASSERT_NE(cfs, nullptr); ASSERT_STREQ(cfs->Name(), "cloud"); @@ -128,7 +129,7 @@ TEST(CloudFileSystemTest, TestInitialize) { BucketOptions bucket; ConfigOptions config_options; config_options.invoke_prepare_options = false; - ASSERT_OK(CloudFileSystem::CreateFromString( + ASSERT_OK(CloudFileSystemEnv::CreateFromString( config_options, "id=cloud; TEST=cloudenvtest:/test/path", &cfs)); ASSERT_NE(cfs, nullptr); ASSERT_STREQ(cfs->Name(), "cloud"); @@ -138,7 +139,7 @@ TEST(CloudFileSystemTest, TestInitialize) { ASSERT_EQ(cfs->GetSrcObjectPath(), "/test/path"); ASSERT_TRUE(cfs->SrcMatchesDest()); - ASSERT_OK(CloudFileSystem::CreateFromString( + ASSERT_OK(CloudFileSystemEnv::CreateFromString( config_options, "id=cloud; TEST=cloudenvtest2:/test/path2?here", &cfs)); ASSERT_NE(cfs, nullptr); ASSERT_STREQ(cfs->Name(), "cloud"); @@ -148,7 +149,7 @@ TEST(CloudFileSystemTest, TestInitialize) { ASSERT_EQ(cfs->GetCloudFileSystemOptions().src_bucket.GetRegion(), "here"); ASSERT_TRUE(cfs->SrcMatchesDest()); - ASSERT_OK(CloudFileSystem::CreateFromString( + ASSERT_OK(CloudFileSystemEnv::CreateFromString( config_options, "id=cloud; TEST=cloudenvtest3:/test/path3; " "src.bucket=my_bucket; dest.object=/my_path", @@ -166,7 +167,7 @@ TEST(CloudFileSystemTest, ConfigureGcpEnv) { std::unique_ptr cfs; ConfigOptions config_options; - Status s = CloudFileSystem::CreateFromString( + Status s = CloudFileSystemEnv::CreateFromString( config_options, "id=gcp; keep_local_sst_files=true", &cfs); #ifdef USE_GCP ASSERT_OK(s); @@ -188,14 +189,14 @@ TEST(CloudFileSystemTest, ConfigureGcsProvider) { std::unique_ptr cfs; ConfigOptions config_options; - Status s = - CloudFileSystem::CreateFromString(config_options, "provider=gcs", &cfs); + Status s = CloudFileSystemEnv::CreateFromString(config_options, + "provider=gcs", &cfs); ASSERT_NOK(s); ASSERT_EQ(cfs, nullptr); #ifdef USE_GCP - ASSERT_OK(CloudFileSystem::CreateFromString(config_options, - "id=gcp; provider=gcs", &cfs)); + ASSERT_OK(CloudFileSystemEnv::CreateFromString(config_options, + "id=gcp; provider=gcs", &cfs)); ASSERT_STREQ(cfs->Name(), "gcp"); ASSERT_NE(cfs->GetStorageProvider(), nullptr); ASSERT_STREQ(cfs->GetStorageProvider()->Name(), @@ -204,7 +205,6 @@ TEST(CloudFileSystemTest, ConfigureGcsProvider) { } } // namespace ROCKSDB_NAMESPACE - int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); @@ -220,11 +220,11 @@ int main(int, char**) { } #endif // USE_GCP -#else // ROCKSDB_LITE +#else // ROCKSDB_LITE #include int main(int, char**) { fprintf(stderr, "SKIPPED as DBCloud is not supported in ROCKSDB_LITE.\n"); return 0; } -#endif // ROCKSDB_LITE \ No newline at end of file +#endif // ROCKSDB_LITE diff --git a/db/version_set.cc b/db/version_set.cc index f3aacbc518c..e6d066bfde0 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -5902,7 +5902,9 @@ Status VersionSet::ProcessManifestWrites( auto epoch = db_options_->replication_epoch_extractor ->EpochOfReplicationSequence( *pending_persist_replication_sequence); +#ifndef NDEBUG bool replication_epoch_set_empty = replication_epochs_.empty(); +#endif replication_epochs_.DeleteEpochsBefore(epoch); // If replication epoch set is not empty before pruning, then it won't // be empty after pruning From 88ffa6b49a3b0c5306c0deec58825ff10f56b8df Mon Sep 17 00:00:00 2001 From: githubzilla Date: Wed, 13 Nov 2024 18:57:59 +0800 Subject: [PATCH 15/16] Fix assert error of dbname != empty --- cloud/cloud_file_system_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/cloud_file_system_impl.cc b/cloud/cloud_file_system_impl.cc index 18a80669111..4817013ce91 100644 --- a/cloud/cloud_file_system_impl.cc +++ b/cloud/cloud_file_system_impl.cc @@ -2283,7 +2283,7 @@ IOStatus CloudFileSystemImpl::FindAllLiveFiles( // filename will be remapped correctly based on current_epoch of // cloud_manifest *manifest_file = - RemapFilename(ManifestFileWithEpoch("" /* dbname */, "" /* epoch */)); + RemapFilename(ManifestFileWithEpoch("" /* epoch */)); RemapFileNumbers(file_nums, live_sst_files); From 90eac149cffe5fc019b32d3880a4f7ff14d9cacf Mon Sep 17 00:00:00 2001 From: githubzilla Date: Thu, 14 Nov 2024 10:51:21 +0800 Subject: [PATCH 16/16] Fix unused parameter warning(as error) --- cloud/cloud_file_system.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cloud/cloud_file_system.cc b/cloud/cloud_file_system.cc index bc9d78e3da0..7004fcaa027 100644 --- a/cloud/cloud_file_system.cc +++ b/cloud/cloud_file_system.cc @@ -473,6 +473,7 @@ Status CloudFileSystemEnv::NewGcpFileSystem( } int DoRegisterCloudObjects(ObjectLibrary& library, const std::string& arg) { + (void) arg; // Suppress unused parameter warning int count = 0; // Register the FileSystem types library.AddFactory(