Skip to content

Commit 0c581ec

Browse files
author
Duc Hieu Pham
committed
Options to disable GetChildren from cloud
Summary: Add an option that short-circuits the `GetChildren` call in the cloud. This option is useful during DB Opening where `GetChildren` becomes too expensive, since it was called multiple times. Only use this when opening DB in a temporary clone. Test Plan: Run in my namespace. This cuts the DB Opening time from 7s to < 800ms, which makes a huge difference during remote compaction. {F37274} Reviewers: dhruba, igor, #platform Reviewed By: dhruba, igor, #platform Differential Revision: https://rockset.phacility.com/D6035
1 parent 8a3b26f commit 0c581ec

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

cloud/aws/aws_env.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@ Status AwsEnv::GetChildren(const std::string& path,
796796

797797
// Fetch the list of children from both buckets in S3
798798
Status st;
799-
if (HasSrcBucket()) {
799+
if (HasSrcBucket() && !cloud_env_options.skip_cloud_files_in_getchildren) {
800800
st = cloud_env_options.storage_provider->ListObjects(
801801
GetSrcBucketName(), GetSrcObjectPath(), result);
802802
if (!st.ok()) {
@@ -806,7 +806,8 @@ Status AwsEnv::GetChildren(const std::string& path,
806806
return st;
807807
}
808808
}
809-
if (HasDestBucket() && !SrcMatchesDest()) {
809+
if (HasDestBucket() && !SrcMatchesDest() &&
810+
!cloud_env_options.skip_cloud_files_in_getchildren) {
810811
st = cloud_env_options.storage_provider->ListObjects(
811812
GetDestBucketName(), GetDestObjectPath(), result);
812813
if (!st.ok()) {

include/rocksdb/cloud/cloud_env_options.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,19 @@ class CloudEnvOptions {
287287
// Default: -1, means don't use this option.
288288
int64_t constant_sst_file_size_in_sst_file_manager;
289289

290+
// Skip listing files in the cloud in GetChildren. That means GetChildren
291+
// will only return files in local directory. During DB opening, RocksDB
292+
// makes multiple GetChildren calls, which are very expensive if we list
293+
// objects in the cloud.
294+
//
295+
// This option is used in remote compaction where we open the DB in a
296+
// temporary folder, and then the folder is deleted after the RPC is done.
297+
// This requires opening DB to be really fast, and it's unnecessary to cleanup
298+
// various things, which is what RocksDB calls GetChildren for.
299+
//
300+
// Default: false.
301+
bool skip_cloud_files_in_getchildren;
302+
290303
CloudEnvOptions(
291304
CloudType _cloud_type = CloudType::kCloudAws,
292305
LogType _log_type = LogType::kLogKafka,
@@ -300,7 +313,8 @@ class CloudEnvOptions {
300313
bool _skip_dbid_verification = false,
301314
bool _use_aws_transfer_manager = false,
302315
int _number_objects_listed_in_one_iteration = 5000,
303-
bool _constant_sst_file_size_in_sst_file_manager = -1)
316+
bool _constant_sst_file_size_in_sst_file_manager = -1,
317+
bool _skip_cloud_files_in_getchildren = false)
304318
: cloud_type(_cloud_type),
305319
log_type(_log_type),
306320
keep_local_sst_files(_keep_local_sst_files),
@@ -319,7 +333,8 @@ class CloudEnvOptions {
319333
number_objects_listed_in_one_iteration(
320334
_number_objects_listed_in_one_iteration),
321335
constant_sst_file_size_in_sst_file_manager(
322-
_constant_sst_file_size_in_sst_file_manager) {}
336+
_constant_sst_file_size_in_sst_file_manager),
337+
skip_cloud_files_in_getchildren(_skip_cloud_files_in_getchildren) {}
323338

324339
// print out all options to the log
325340
void Dump(Logger* log) const;

0 commit comments

Comments
 (0)