Skip to content

Commit feb393b

Browse files
authored
Add method to extract live file list from local manifest (#286)
Add a new method `FindLiveFilesFromLocalManifest` that mirrors FindAllLiveFiles, but doesn't resolve manifest filename or fetch it from the cloud, making it somewhat safer (in return for the need to ensure the manifest is present) and more flexible as it can operate for example on a copy of the live manifest. ### Test plan Most of the code is already covered by tests for `FindAllLiveFiles`, new test added in `db_cloud_test` for retrieving live files from a manifest copy.
1 parent 587e968 commit feb393b

File tree

7 files changed

+166
-41
lines changed

7 files changed

+166
-41
lines changed

cloud/cloud_file_system_impl.cc

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2002,7 +2002,6 @@ IOStatus CloudFileSystemImpl::UploadCloudManifest(
20022002
return st;
20032003
}
20042004

2005-
20062005
IOStatus CloudFileSystemImpl::ApplyCloudManifestDelta(
20072006
const CloudManifestDelta& delta, bool* delta_applied) {
20082007
*delta_applied = cloud_manifest_->AddEpoch(delta.file_num, delta.epoch);
@@ -2271,6 +2270,19 @@ Status CloudFileSystemImpl::CheckValidity() const {
22712270
}
22722271
}
22732272

2273+
void CloudFileSystemImpl::RemapFileNumbers(
2274+
const std::set<uint64_t>& file_numbers,
2275+
std::vector<std::string>* sst_file_names) {
2276+
sst_file_names->resize(file_numbers.size());
2277+
2278+
size_t idx = 0;
2279+
for (auto num : file_numbers) {
2280+
std::string logical_path = MakeTableFileName("" /* path */, num);
2281+
(*sst_file_names)[idx] = RemapFilename(logical_path);
2282+
idx++;
2283+
}
2284+
}
2285+
22742286
IOStatus CloudFileSystemImpl::FindAllLiveFiles(
22752287
const std::string& local_dbname, std::vector<std::string>* live_sst_files,
22762288
std::string* manifest_file) {
@@ -2282,18 +2294,29 @@ IOStatus CloudFileSystemImpl::FindAllLiveFiles(
22822294
return st;
22832295
}
22842296

2285-
live_sst_files->resize(file_nums.size());
2286-
22872297
// filename will be remapped correctly based on current_epoch of
22882298
// cloud_manifest
22892299
*manifest_file =
22902300
RemapFilename(ManifestFileWithEpoch("" /* dbname */, "" /* epoch */));
2291-
size_t idx = 0;
2292-
for (auto num : file_nums) {
2293-
std::string logical_path = MakeTableFileName("" /* path */, num);
2294-
(*live_sst_files)[idx] = RemapFilename(logical_path);
2295-
idx++;
2301+
2302+
RemapFileNumbers(file_nums, live_sst_files);
2303+
2304+
return IOStatus::OK();
2305+
}
2306+
2307+
IOStatus CloudFileSystemImpl::FindLiveFilesFromLocalManifest(
2308+
const std::string& manifest_file,
2309+
std::vector<std::string>* live_sst_files) {
2310+
std::unique_ptr<LocalManifestReader> extractor(
2311+
new LocalManifestReader(info_log_, this));
2312+
std::set<uint64_t> file_nums;
2313+
auto st = extractor->GetManifestLiveFiles(manifest_file, &file_nums);
2314+
if (!st.ok()) {
2315+
return st;
22962316
}
2317+
2318+
RemapFileNumbers(file_nums, live_sst_files);
2319+
22972320
return IOStatus::OK();
22982321
}
22992322

@@ -2307,7 +2330,9 @@ void CloudFileSystemImpl::TEST_InitEmptyCloudManifest() {
23072330
}
23082331

23092332
size_t CloudFileSystemImpl::TEST_NumScheduledJobs() const {
2310-
return cloud_file_deletion_scheduler_ ? cloud_file_deletion_scheduler_->TEST_NumScheduledJobs() : 0;
2333+
return cloud_file_deletion_scheduler_
2334+
? cloud_file_deletion_scheduler_->TEST_NumScheduledJobs()
2335+
: 0;
23112336
}
23122337

23132338
#endif

cloud/cloud_file_system_impl.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <condition_variable>
66
#include <mutex>
77
#include <thread>
8+
#include <set>
89

910
#include "cloud/cloud_manifest.h"
1011
#include "port/port_posix.h"
@@ -151,13 +152,17 @@ class CloudFileSystemImpl : public CloudFileSystem {
151152

152153
// Find all live files based on cloud_manifest_ and local MANIFEST FILE
153154
// If local MANIFEST file doesn't exist, it will pull from cloud
154-
//
155+
//
155156
// REQUIRES: cloud_manifest_ is loaded
156157
// REQUIRES: cloud_manifest_ is not updated when calling this function
157158
IOStatus FindAllLiveFiles(const std::string& local_dbname,
158159
std::vector<std::string>* live_sst_files,
159160
std::string* manifest_file) override;
160161

162+
IOStatus FindLiveFilesFromLocalManifest(
163+
const std::string& manifest_file,
164+
std::vector<std::string>* live_sst_files) override;
165+
161166
IOStatus extractParents(const std::string& bucket_name_prefix,
162167
const DbidList& dbid_list, DbidParents* parents);
163168
IOStatus PreloadCloudManifest(const std::string& local_dbname) override;
@@ -256,8 +261,7 @@ class CloudFileSystemImpl : public CloudFileSystem {
256261
const CloudManifestDelta& delta) const override;
257262

258263
IOStatus GetMaxFileNumberFromCurrentManifest(
259-
const std::string& local_dbname,
260-
uint64_t* max_file_number) override;
264+
const std::string& local_dbname, uint64_t* max_file_number) override;
261265

262266
// Upload MANIFEST-epoch to the cloud
263267
IOStatus UploadManifest(const std::string& local_dbname,
@@ -374,10 +378,15 @@ class CloudFileSystemImpl : public CloudFileSystem {
374378
// 00010.sst-[epochX], but the real mapping for 00010.sst is [epochY], the
375379
// file will be treated as invisible
376380
bool IsFileInvisible(const std::vector<std::string>& active_cookies,
377-
const std::string& fname) const;
381+
const std::string& fname) const;
378382

379383
void log(InfoLogLevel level, const std::string& fname,
380384
const std::string& msg);
385+
386+
// Remap SST file numbers to file names
387+
void RemapFileNumbers(const std::set<uint64_t>& file_numbers,
388+
std::vector<std::string>* sst_file_names);
389+
381390
// Fetch the cloud manifest based on the cookie
382391
IOStatus FetchCloudManifest(const std::string& local_dbname,
383392
const std::string& cookie);
@@ -386,6 +395,7 @@ class CloudFileSystemImpl : public CloudFileSystem {
386395
IOStatus FetchManifest(const std::string& local_dbname,
387396
const std::string& epoch);
388397
std::string GenerateNewEpochId();
398+
389399
std::unique_ptr<CloudManifest> cloud_manifest_;
390400
// This runs only in tests when we want to disable cloud manifest
391401
// functionality

cloud/cloud_file_system_wrapper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,12 @@ class MockCloudFileSystem : public CloudFileSystem {
248248
return notsup_;
249249
}
250250

251+
IOStatus FindLiveFilesFromLocalManifest(
252+
const std::string& /* manifest_file */,
253+
std::vector<std::string>* /* live_sst_files */) override {
254+
return notsup_;
255+
}
256+
251257
private:
252258
IOStatus notsup_;
253259
std::string empty_;

cloud/db_cloud_test.cc

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <atomic>
1313
#include <chrono>
1414
#include <cinttypes>
15+
#include <filesystem>
1516

1617
#include "cloud/cloud_file_deletion_scheduler.h"
1718
#include "cloud/cloud_file_system_impl.h"
@@ -429,7 +430,7 @@ class CloudTest : public testing::Test {
429430
ASSERT_EQ(sst_files.size(), 1);
430431
}
431432

432-
// check that fname existsin in src bucket/object path
433+
// check that fname exists in in src bucket/object path
433434
rocksdb::Status ExistsCloudObject(const std::string& filename) const {
434435
return GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject(
435436
GetCloudFileSystem()->GetSrcBucketName(),
@@ -593,6 +594,47 @@ TEST_F(CloudTest, GetChildrenTest) {
593594
EXPECT_EQ(sst_files, 1);
594595
}
595596

597+
TEST_F(CloudTest, FindLiveFilesFromLocalManifestTest) {
598+
OpenDB();
599+
ASSERT_OK(db_->Put(WriteOptions(), "Hello", "Universe"));
600+
ASSERT_OK(db_->Flush(FlushOptions()));
601+
602+
// wait until files are persisted into s3
603+
GetDBImpl()->TEST_WaitForBackgroundWork();
604+
605+
CloseDB();
606+
607+
// determine the manifest name and store a copy in a different location
608+
auto cfs = GetCloudFileSystem();
609+
auto manifest_file = cfs->RemapFilename("MANIFEST");
610+
auto manifest_path = std::filesystem::path(dbname_) / manifest_file;
611+
612+
auto alt_manifest_path =
613+
std::filesystem::temp_directory_path() / ("ALT-" + manifest_file);
614+
std::filesystem::copy_file(manifest_path, alt_manifest_path);
615+
616+
DestroyDir(dbname_);
617+
618+
std::vector<std::string> tablefiles;
619+
// verify the copied manifest can be processed correctly
620+
ASSERT_OK(GetCloudFileSystem()->FindLiveFilesFromLocalManifest(
621+
alt_manifest_path, &tablefiles));
622+
623+
// verify the result
624+
EXPECT_EQ(tablefiles.size(), 1);
625+
626+
for (auto name : tablefiles) {
627+
EXPECT_EQ(GetFileType(name), RocksDBFileType::kSstFile);
628+
// verify that the sst file indeed exists in cloud
629+
EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject(
630+
GetCloudFileSystem()->GetSrcBucketName(),
631+
GetCloudFileSystem()->GetSrcObjectPath() + pathsep + name));
632+
}
633+
634+
// clean up
635+
std::filesystem::remove(alt_manifest_path);
636+
}
637+
596638
//
597639
// Create and read from a clone.
598640
//

cloud/manifest_reader.cc

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ IOStatus LocalManifestReader::GetLiveFilesLocally(
3535
std::unique_ptr<SequentialFileReader> manifest_file_reader;
3636
IOStatus s;
3737
{
38-
// file name here doesn't matter, it will always be mapped to the correct Manifest file.
39-
// use empty epoch here so that it will be recognized as manifest file type
38+
// file name here doesn't matter, it will always be mapped to the correct
39+
// Manifest file. use empty epoch here so that it will be recognized as
40+
// manifest file type
4041
auto local_manifest_file = cfs_impl->RemapFilename(
4142
ManifestFileWithEpoch(local_dbname, "" /* epoch */));
4243

@@ -53,6 +54,27 @@ IOStatus LocalManifestReader::GetLiveFilesLocally(
5354
return GetLiveFilesFromFileReader(std::move(manifest_file_reader), list);
5455
}
5556

57+
IOStatus LocalManifestReader::GetManifestLiveFiles(
58+
const std::string& manifest_file, std::set<uint64_t>* list) const {
59+
auto* cfs_impl = dynamic_cast<CloudFileSystemImpl*>(cfs_);
60+
assert(cfs_impl);
61+
62+
std::unique_ptr<SequentialFileReader> manifest_file_reader;
63+
IOStatus s;
64+
{
65+
std::unique_ptr<FSSequentialFile> file;
66+
s = cfs_impl->NewSequentialFile(manifest_file, FileOptions(), &file,
67+
nullptr /*dbg*/);
68+
if (!s.ok()) {
69+
return s;
70+
}
71+
manifest_file_reader.reset(
72+
new SequentialFileReader(std::move(file), manifest_file));
73+
}
74+
75+
return GetLiveFilesFromFileReader(std::move(manifest_file_reader), list);
76+
}
77+
5678
IOStatus LocalManifestReader::GetLiveFilesFromFileReader(
5779
std::unique_ptr<SequentialFileReader> file_reader,
5880
std::set<uint64_t>* list) const {
@@ -92,8 +114,7 @@ IOStatus LocalManifestReader::GetLiveFilesFromFileReader(
92114
uint64_t num = one.second;
93115
// Deleted files should belong to some CF
94116
auto it = cf_live_files.find(edit.GetColumnFamily());
95-
if ((it == cf_live_files.end()) ||
96-
(it->second.count(level) == 0) ||
117+
if ((it == cf_live_files.end()) || (it->second.count(level) == 0) ||
97118
(it->second[level].count(num) == 0)) {
98119
return IOStatus::Corruption(
99120
"Corrupted Manifest file with unrecognized deleted file: " +
@@ -158,8 +179,8 @@ IOStatus ManifestReader::GetLiveFiles(const std::string& bucket_path,
158179
}
159180
std::unique_ptr<SequentialFileReader> file_reader;
160181
{
161-
auto manifestFile = ManifestFileWithEpoch(
162-
bucket_path, cloud_manifest->GetCurrentEpoch());
182+
auto manifestFile =
183+
ManifestFileWithEpoch(bucket_path, cloud_manifest->GetCurrentEpoch());
163184
std::unique_ptr<FSSequentialFile> file;
164185
s = cfs_->NewSequentialFileCloud(bucket_prefix_, manifestFile, file_opts,
165186
&file, dbg);

cloud/manifest_reader.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,18 @@ class LocalManifestReader {
2929
IOStatus GetLiveFilesLocally(const std::string& local_dbname,
3030
std::set<uint64_t>* list) const;
3131

32+
// Read given local manifest file and return all live files that it
33+
// references. This doesn't rely on CLOUDMANIFEST and just accepts (any valid)
34+
// manifest file.
35+
//
36+
// Provided manifest file is not updated or pulled from cloud when calling the
37+
// function.
38+
IOStatus GetManifestLiveFiles(const std::string& manifest_file,
39+
std::set<uint64_t>* list) const;
40+
3241
protected:
33-
// Get all the live sst file number by reading version_edit records from file_reader
42+
// Get all the live SST file numbers by reading version_edit records from
43+
// file_reader
3444
IOStatus GetLiveFilesFromFileReader(
3545
std::unique_ptr<SequentialFileReader> file_reader,
3646
std::set<uint64_t>* list) const;
@@ -42,7 +52,7 @@ class LocalManifestReader {
4252
//
4353
// Operates on MANIFEST files stored in the cloud bucket directly
4454
//
45-
class ManifestReader: public LocalManifestReader {
55+
class ManifestReader : public LocalManifestReader {
4656
public:
4757
ManifestReader(std::shared_ptr<Logger> info_log, CloudFileSystem* cfs,
4858
const std::string& bucket_prefix);

0 commit comments

Comments
 (0)