66
77#include " rocksdb/cloud/db_cloud.h"
88
9- #include < cinttypes>
109#include < algorithm>
1110#include < chrono>
11+ #include < cinttypes>
1212
1313#include " cloud/aws/aws_env.h"
1414#include " cloud/aws/aws_file.h"
1515#include " cloud/db_cloud_impl.h"
1616#include " cloud/filename.h"
1717#include " cloud/manifest_reader.h"
18+ #include " logging/logging.h"
1819#include " rocksdb/options.h"
1920#include " rocksdb/status.h"
2021#include " rocksdb/table.h"
@@ -133,11 +134,11 @@ class CloudTest : public testing::Test {
133134 }
134135
135136 // Creates and Opens a clone
136- void CloneDB (const std::string& clone_name,
137- const std::string& dest_bucket_name,
138- const std::string& dest_object_path,
139- std::unique_ptr<DBCloud>* cloud_db,
140- std::unique_ptr<CloudEnv>* cloud_env) {
137+ Status CloneDB (const std::string& clone_name,
138+ const std::string& dest_bucket_name,
139+ const std::string& dest_object_path,
140+ std::unique_ptr<DBCloud>* cloud_db,
141+ std::unique_ptr<CloudEnv>* cloud_env) {
141142 // The local directory where the clone resides
142143 std::string cname = clone_dir_ + " /" + clone_name;
143144
@@ -153,11 +154,15 @@ class CloudTest : public testing::Test {
153154 copt.dest_bucket .SetBucketName (dest_bucket_name);
154155 }
155156 copt.dest_bucket .SetObjectPath (dest_object_path);
156- if (! copt.dest_bucket .IsValid ()) {
157+ if (!copt.dest_bucket .IsValid ()) {
157158 copt.keep_local_sst_files = true ;
158159 }
159160 // Create new AWS env
160- ASSERT_OK (CloudEnv::NewAwsEnv (base_env_, copt, options_.info_log , &cenv));
161+ Status st = CloudEnv::NewAwsEnv (base_env_, copt, options_.info_log , &cenv);
162+ if (!st.ok ()) {
163+ return st;
164+ }
165+
161166 // To catch any possible file deletion bugs, we set file deletion delay to
162167 // smallest possible
163168 ((AwsEnv*)cenv)->TEST_SetFileDeletionDelay (std::chrono::seconds (0 ));
@@ -175,15 +180,20 @@ class CloudTest : public testing::Test {
175180 ColumnFamilyDescriptor (kDefaultColumnFamilyName , cfopt));
176181 std::vector<ColumnFamilyHandle*> handles;
177182
178- ASSERT_OK (DBCloud::Open (options_, cname, column_families,
179- persistent_cache_path_, persistent_cache_size_gb_,
180- &handles, &clone_db));
183+ st = DBCloud::Open (options_, cname, column_families, persistent_cache_path_,
184+ persistent_cache_size_gb_, &handles, &clone_db);
185+ if (!st.ok ()) {
186+ return st;
187+ }
188+
181189 cloud_db->reset (clone_db);
182190
183191 // Delete the handle for the default column family because the DBImpl
184192 // always holds a reference to it.
185- ASSERT_TRUE (handles.size () > 0 );
193+ assert (handles.size () > 0 );
186194 delete handles[0 ];
195+
196+ return st;
187197 }
188198
189199 void CloseDB () {
@@ -1165,6 +1175,60 @@ TEST_F(CloudTest, Ephemeral) {
11651175 }
11661176}
11671177
1178+ // This test is performed in a rare race condition where ephemral clone is
1179+ // started after durable clone upload its CLOUDMANIFEST but before it uploads
1180+ // one of the MANIFEST. In this case, we want to verify that ephemeral clone is
1181+ // able to reinitialize instead of crash looping.
1182+ TEST_F (CloudTest, EphemeralOnCorruptedDB) {
1183+ cloud_env_options_.keep_local_sst_files = true ;
1184+ options_.level0_file_num_compaction_trigger = 100 ; // never compact
1185+
1186+ OpenDB ();
1187+
1188+ std::vector<std::string> files;
1189+ base_env_->GetChildren (dbname_, &files);
1190+
1191+ // Get the MANIFEST file
1192+ std::string manifest_file_name;
1193+ for (const auto & file_name : files) {
1194+ if (file_name.rfind (" MANIFEST" , 0 ) == 0 ) {
1195+ manifest_file_name = file_name;
1196+ break ;
1197+ }
1198+ }
1199+
1200+ ASSERT_FALSE (manifest_file_name.empty ());
1201+
1202+ // Delete MANIFEST file from S3 bucket.
1203+ // This is to simulate the scenario where CLOUDMANIFEST is uploaded, but
1204+ // MANIFEST is not yet uploaded from the durable shard.
1205+ auto aws_env = dynamic_cast <AwsEnv*>(aenv_.get ());
1206+ ASSERT_TRUE (aws_env != nullptr );
1207+ aws_env->TEST_DeletePathInS3 (
1208+ aws_env->GetSrcBucketName (),
1209+ aws_env->GetSrcObjectPath () + " /" + manifest_file_name);
1210+
1211+ // Ephemeral clone should fail.
1212+ std::unique_ptr<DBCloud> clone_db;
1213+ std::unique_ptr<CloudEnv> cenv;
1214+ Status st = CloneDB (" clone1" , " " , " " , &clone_db, &cenv);
1215+ ASSERT_NOK (st);
1216+
1217+ // Put the MANIFEST file back
1218+ aws_env->PutObject (dbname_ + " /" + manifest_file_name,
1219+ aws_env->GetSrcBucketName (),
1220+ aws_env->GetSrcObjectPath () + " /" + manifest_file_name);
1221+
1222+ // Try one more time. This time it should succeed.
1223+ clone_db.reset ();
1224+ cenv.reset ();
1225+ st = CloneDB (" clone1" , " " , " " , &clone_db, &cenv);
1226+ ASSERT_OK (st);
1227+
1228+ clone_db->Close ();
1229+ CloseDB ();
1230+ }
1231+
11681232//
11691233// Test Ephemeral clones with resyncOnOpen mode.
11701234// In this mode, every open of the ephemeral clone db causes its
0 commit comments