Skip to content

Commit 92eb8ef

Browse files
committed
Faster globbing
1 parent dbbbd89 commit 92eb8ef

File tree

1 file changed

+29
-8
lines changed

1 file changed

+29
-8
lines changed

src/s3fs.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,12 +1095,16 @@ static bool Match(vector<string>::const_iterator key, vector<string>::const_iter
10951095
vector<string>::const_iterator pattern, vector<string>::const_iterator pattern_end) {
10961096

10971097
while (key != key_end && pattern != pattern_end) {
1098+
if (*key == "**") {
1099+
return true;
1100+
}
10981101
if (*pattern == "**") {
10991102
if (std::next(pattern) == pattern_end) {
11001103
return true;
11011104
}
1105+
pattern ++;
11021106
while (key != key_end) {
1103-
if (Match(key, key_end, std::next(pattern), pattern_end)) {
1107+
if (Match(key, key_end, pattern, pattern_end)) {
11041108
return true;
11051109
}
11061110
key++;
@@ -1113,6 +1117,12 @@ static bool Match(vector<string>::const_iterator key, vector<string>::const_iter
11131117
key++;
11141118
pattern++;
11151119
}
1120+
if (*pattern == "**") {
1121+
while (*pattern == "**") pattern++;
1122+
if (pattern == pattern_end) {
1123+
return true;
1124+
}
1125+
}
11161126
return key == key_end && pattern == pattern_end;
11171127
}
11181128

@@ -1183,12 +1193,23 @@ bool S3GlobResult::ExpandNextPath() const {
11831193
// we have common prefixes left to scan - perform the request
11841194
auto prefix_path = parsed_s3_url.prefix + parsed_s3_url.bucket + '/' + current_common_prefix;
11851195

1186-
auto prefix_res =
1187-
AWSListObjectV2::Request(prefix_path, *http_params, s3_auth_params, common_prefix_continuation_token);
1188-
AWSListObjectV2::ParseFileList(prefix_res, s3_keys);
1189-
auto more_prefixes = AWSListObjectV2::ParseCommonPrefix(prefix_res);
1190-
common_prefixes.insert(common_prefixes.end(), more_prefixes.begin(), more_prefixes.end());
1191-
common_prefix_continuation_token = AWSListObjectV2::ParseContinuationToken(prefix_res);
1196+
1197+
vector<string> pattern_splits = StringUtil::Split(parsed_s3_url.key, "/");
1198+
vector<string> key_splits = StringUtil::Split(current_common_prefix, "/");
1199+
//pattern_splits.resize(key_splits.size());
1200+
key_splits.push_back("**");
1201+
const bool is_match = Match(key_splits.begin(), key_splits.end(), pattern_splits.begin(), pattern_splits.end());
1202+
std::cout << current_common_prefix << "\t" << parsed_s3_url.key << "\t" << (is_match ? "MATCH" : "no" )<< "\n";
1203+
if (is_match) {
1204+
auto prefix_res = AWSListObjectV2::Request(prefix_path, *http_params, s3_auth_params,
1205+
common_prefix_continuation_token, true);
1206+
1207+
AWSListObjectV2::ParseFileList(prefix_res, s3_keys);
1208+
auto more_prefixes = AWSListObjectV2::ParseCommonPrefix(prefix_res);
1209+
common_prefixes.insert(common_prefixes.end(), more_prefixes.begin(), more_prefixes.end());
1210+
common_prefix_continuation_token = AWSListObjectV2::ParseContinuationToken(prefix_res);
1211+
}
1212+
11921213
if (common_prefix_continuation_token.empty()) {
11931214
// we are done with the current common prefix
11941215
// either move on to the next one, or finish up
@@ -1207,7 +1228,7 @@ bool S3GlobResult::ExpandNextPath() const {
12071228
}
12081229
// issue the main request
12091230
string response_str =
1210-
AWSListObjectV2::Request(shared_path, *http_params, s3_auth_params, main_continuation_token);
1231+
AWSListObjectV2::Request(shared_path, *http_params, s3_auth_params, main_continuation_token, true);
12111232
main_continuation_token = AWSListObjectV2::ParseContinuationToken(response_str);
12121233
AWSListObjectV2::ParseFileList(response_str, s3_keys);
12131234

0 commit comments

Comments
 (0)