Skip to content

Commit 269606a

Browse files
committed
Add back handling for use_delimiter, making globs like s3://bucket/*-something/somethingelse/*.csv faster
1 parent 17011bf commit 269606a

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

src/include/s3fs.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ class S3FileSystem : public HTTPFileSystem {
290290
// Helper class to do s3 ListObjectV2 api call https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
291291
struct AWSListObjectV2 {
292292
static string Request(const string &path, HTTPParams &http_params, const S3AuthParams &s3_auth_params,
293-
string &continuation_token);
293+
string &continuation_token, bool use_delimiter = false);
294294
static void ParseFileList(string &aws_response, vector<OpenFileInfo> &result);
295295
static vector<string> ParseCommonPrefix(string &aws_response);
296296
static string ParseContinuationToken(string &aws_response);

src/s3fs.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,12 +1114,19 @@ bool S3GlobResult::ExpandNextPath() const {
11141114
// we have common prefixes left to scan - perform the request
11151115
auto prefix_path = parsed_s3_url.prefix + parsed_s3_url.bucket + '/' + current_common_prefix;
11161116

1117-
auto prefix_res =
1118-
AWSListObjectV2::Request(prefix_path, *http_params, s3_auth_params, common_prefix_continuation_token);
1119-
AWSListObjectV2::ParseFileList(prefix_res, s3_keys);
1120-
auto more_prefixes = AWSListObjectV2::ParseCommonPrefix(prefix_res);
1121-
common_prefixes.insert(common_prefixes.end(), more_prefixes.begin(), more_prefixes.end());
1122-
common_prefix_continuation_token = AWSListObjectV2::ParseContinuationToken(prefix_res);
1117+
vector<string> pattern_splits = StringUtil::Split(parsed_s3_url.key, "/");
1118+
vector<string> key_splits = StringUtil::Split(current_common_prefix, "/");
1119+
pattern_splits.resize(key_splits.size());
1120+
bool is_match = Match(key_splits.begin(), key_splits.end(), pattern_splits.begin(), pattern_splits.end());
1121+
if (is_match) {
1122+
auto prefix_res = AWSListObjectV2::Request(prefix_path, *http_params, s3_auth_params,
1123+
common_prefix_continuation_token, true);
1124+
1125+
AWSListObjectV2::ParseFileList(prefix_res, s3_keys);
1126+
auto more_prefixes = AWSListObjectV2::ParseCommonPrefix(prefix_res);
1127+
common_prefixes.insert(common_prefixes.end(), more_prefixes.begin(), more_prefixes.end());
1128+
common_prefix_continuation_token = AWSListObjectV2::ParseContinuationToken(prefix_res);
1129+
}
11231130
if (common_prefix_continuation_token.empty()) {
11241131
// we are done with the current common prefix
11251132
// either move on to the next one, or finish up
@@ -1138,7 +1145,7 @@ bool S3GlobResult::ExpandNextPath() const {
11381145
}
11391146
// issue the main request
11401147
string response_str =
1141-
AWSListObjectV2::Request(shared_path, *http_params, s3_auth_params, main_continuation_token);
1148+
AWSListObjectV2::Request(shared_path, *http_params, s3_auth_params, main_continuation_token, true);
11421149
main_continuation_token = AWSListObjectV2::ParseContinuationToken(response_str);
11431150
AWSListObjectV2::ParseFileList(response_str, s3_keys);
11441151

@@ -1270,7 +1277,7 @@ HTTPException S3FileSystem::GetHTTPError(FileHandle &handle, const HTTPResponse
12701277
return GetS3Error(s3_handle.auth_params, response, url);
12711278
}
12721279
string AWSListObjectV2::Request(const string &path, HTTPParams &http_params, const S3AuthParams &s3_auth_params,
1273-
string &continuation_token) {
1280+
string &continuation_token, bool use_delimiter) {
12741281
auto parsed_url = S3FileSystem::S3UrlParse(path, s3_auth_params);
12751282

12761283
// Construct the ListObjectsV2 call
@@ -1284,6 +1291,10 @@ string AWSListObjectV2::Request(const string &path, HTTPParams &http_params, con
12841291
req_params += "encoding-type=url&list-type=2";
12851292
req_params += "&prefix=" + S3FileSystem::UrlEncode(parsed_url.key, true);
12861293

1294+
if (use_delimiter) {
1295+
req_params += "&delimiter=%2F";
1296+
}
1297+
12871298
auto header_map =
12881299
CreateS3Header(req_path, req_params, parsed_url.host, "s3", "GET", s3_auth_params, "", "", "", "");
12891300

0 commit comments

Comments
 (0)