Skip to content

Commit cfe7c0a

Browse files
branch-4.0: [tool](filecache) support define s3 prefix in benchmark #60019 (#60174)
Cherry-picked from #60019 Co-authored-by: zhengyu <[email protected]>
1 parent 4e57f46 commit cfe7c0a

File tree

1 file changed

+27
-6
lines changed

1 file changed

+27
-6
lines changed

be/src/io/tools/file_cache_microbench.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,27 @@ const size_t BUFFER_SIZE = 1024 * 1024;
9494
// Just 10^9.
9595
static constexpr auto NS = 1000000000UL;
9696

97+
static std::string normalize_benchmark_prefix(std::string_view raw_prefix) {
98+
std::string normalized {doris::trim(raw_prefix)};
99+
while (!normalized.empty() && normalized.front() == '/') {
100+
normalized.erase(normalized.begin());
101+
}
102+
while (!normalized.empty() && normalized.back() == '/') {
103+
normalized.pop_back();
104+
}
105+
return normalized;
106+
}
107+
108+
static std::string get_prefix() {
109+
std::string prefix = HIDDEN_PREFIX;
110+
std::string subdir = normalize_benchmark_prefix(doris::config::test_s3_prefix);
111+
if (!subdir.empty()) {
112+
prefix += subdir;
113+
prefix += "/";
114+
}
115+
return prefix;
116+
}
117+
97118
DEFINE_int32(port, 8888, "Http Port of this server");
98119

99120
static std::string build_info() {
@@ -478,7 +499,7 @@ std::string get_usage(const std::string& progname) {
478499
"read_iops": <limit>, // IOPS limit for reading per segment files
479500
"num_threads": <count>, // Number of threads in the thread pool, default 200
480501
"num_files": <count>, // Number of segments to write/read
481-
"file_prefix": "<prefix>", // Prefix for segment files, Notice: this tools hide prefix(test_file_cache_microbench/) before file_prefix
502+
"file_prefix": "<prefix>", // Prefix for segment files, key prefix is test_file_cache_microbench/<test_s3_prefix>/
482503
"write_batch_size": <size>, // Size of data to write in each write operation
483504
"cache_type": <type>, // Write or Read data enter file cache queue type, support NORMAL | TTL | INDEX | DISPOSABLE, default NORMAL
484505
"expiration": <timestamp>, // File cache ttl expire time, value is a unix timestamp
@@ -711,7 +732,7 @@ struct JobConfig {
711732
"repeat: {}, expiration: {}, cache_type: {}, read_offset: [{}, {}), "
712733
"read_length: [{}, {})",
713734
size_bytes_perfile, write_iops, read_iops, num_threads, num_files,
714-
HIDDEN_PREFIX + file_prefix, write_file_cache, write_batch_size, repeat, expiration,
735+
get_prefix() + file_prefix, write_file_cache, write_batch_size, repeat, expiration,
715736
cache_type, read_offset_left, read_offset_right, read_length_left,
716737
read_length_right);
717738
}
@@ -997,7 +1018,7 @@ class JobManager {
9971018
// If it's a read-only job, find the previously written files
9981019
if (config.read_iops > 0 && config.write_iops == 0) {
9991020
std::string old_job_id =
1000-
s3_file_records.find_job_id_by_prefix(HIDDEN_PREFIX + config.file_prefix);
1021+
s3_file_records.find_job_id_by_prefix(get_prefix() + config.file_prefix);
10011022
if (old_job_id.empty()) {
10021023
throw std::runtime_error(
10031024
"Can't find previously job uploaded files. Please make sure read "
@@ -1010,7 +1031,7 @@ class JobManager {
10101031

10111032
// Generate file keys
10121033
for (int i = 0; i < config.num_files; ++i) {
1013-
keys.push_back(HIDDEN_PREFIX + config.file_prefix + "/" + rewrite_job_id + "_" +
1034+
keys.push_back(get_prefix() + config.file_prefix + "/" + rewrite_job_id + "_" +
10141035
std::to_string(i));
10151036
}
10161037

@@ -1137,7 +1158,7 @@ class JobManager {
11371158
auto start_time = std::chrono::steady_clock::now();
11381159

11391160
int64_t exist_job_perfile_size = s3_file_records.get_exist_job_perfile_size_by_prefix(
1140-
HIDDEN_PREFIX + config.file_prefix);
1161+
get_prefix() + config.file_prefix);
11411162
std::vector<std::future<void>> read_futures;
11421163
doris::io::IOContext io_ctx;
11431164
doris::io::FileCacheStatistics total_stats;
@@ -1160,7 +1181,7 @@ class JobManager {
11601181
std::vector<std::string> read_files;
11611182
if (exist_job_perfile_size != -1) {
11621183
// read exist files
1163-
s3_file_records.get_exist_job_files_by_prefix(HIDDEN_PREFIX + config.file_prefix,
1184+
s3_file_records.get_exist_job_files_by_prefix(get_prefix() + config.file_prefix,
11641185
read_files, config.num_files);
11651186
}
11661187

0 commit comments

Comments
 (0)