Skip to content

Commit e88935a

Browse files
committed
Add allow_asterisks_in_http_paths setting, that default to false, so that Globs on HTTP file system will now throw
1 parent 7201bc9 commit e88935a

File tree

3 files changed

+38
-0
lines changed

3 files changed

+38
-0
lines changed

src/httpfs_extension.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ static void LoadInternal(ExtensionLoader &loader) {
6262
"http_keep_alive",
6363
"Keep alive connections. Setting this to false can help when running into connection failures",
6464
LogicalType::BOOLEAN, Value(true));
65+
config.AddExtensionOption("allow_asterisks_in_http_paths", "Allow '*' character in URLs users can query",
66+
LogicalType::BOOLEAN, Value(false));
6567
config.AddExtensionOption("enable_curl_server_cert_verification",
6668
"Enable server side certificate verification for CURL backend.", LogicalType::BOOLEAN,
6769
Value(true));

src/include/httpfs.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,14 @@ class HTTPFileSystem : public FileSystem {
139139
static bool TryParseLastModifiedTime(const string &timestamp, timestamp_t &result);
140140

141141
vector<OpenFileInfo> Glob(const string &path, FileOpener *opener = nullptr) override {
142+
if (path.find('*') != std::string::npos && opener) {
143+
Value setting_val;
144+
if (FileOpener::TryGetCurrentSetting(opener, "allow_asterisks_in_http_paths", setting_val) &&
145+
!setting_val.GetValue<bool>()) {
146+
throw InvalidInputException("Globs (`*`) for generic HTTP file is are not supported.\nConsider `SET "
147+
"allow_asterisks_in_http_paths = true;` to allow this behaviour");
148+
}
149+
}
142150
return {path}; // FIXME
143151
}
144152

test/sql/httpfs/globbing.test

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# name: test/sql/httpfs/globbing.test
2+
# description: Ensure the HuggingFace filesystem works as expected
3+
# group: [httpfs]
4+
5+
require parquet
6+
7+
require httpfs
8+
9+
statement error
10+
select parse_path(filename), size, part, date from read_parquet('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/hive-partitioning/simple/*/*/test.parquet') order by filename;
11+
----
12+
Invalid Input Error: Globs (`*`) for generic HTTP file is are not supported.
13+
14+
statement ok
15+
SET allow_asterisks_in_http_paths = true;
16+
17+
statement error
18+
select parse_path(filename), size, part, date from read_parquet('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/hive-partitioning/simple/*/*/test.parquet') order by filename;
19+
----
20+
HTTP Error: Unable to connect to URL
21+
22+
statement ok
23+
SET allow_asterisks_in_http_paths = false;
24+
25+
statement error
26+
select parse_path(filename), size, part, date from read_parquet('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/hive-partitioning/simple/*/*/test.parquet') order by filename;
27+
----
28+
Invalid Input Error: Globs (`*`) for generic HTTP file is are not supported.

0 commit comments

Comments
 (0)