Skip to content

Commit 24899d5

Browse files
committed
support ftp+rsync in extract_host
1 parent 30f3637 commit 24899d5

File tree

2 files changed

+16
-5
lines changed

2 files changed

+16
-5
lines changed

src/functions/extract_host.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@ namespace duckdb
2424
{
2525
// Regex to match the host component of a URL
2626
// Explanation:
27-
// ^ - Start of the string
28-
// (?: - Non-capturing group for the optional protocol
29-
// https?:\/\/ - Matches "http://" or "https://"
27+
// ^ - Start of the string
28+
// (?: - Non-capturing group for the optional protocol
29+
// (?:ftp|https?|rsync) - Non-capturing group for the scheme
30+
// :\/\/ - Matches "://"
3031
// )?
31-
// ([^\/\s:?#]+) - Capturing group for the host (any characters except '/', ':', '?', '#', or whitespace)
32-
std::regex host_regex (R"(^(?:https?:\/\/)?([^\/\s:?#]+))");
32+
// ([^\/\s:?#]+) - Capturing group for the host (any characters except '/', ':', '?', '#', or whitespace)
33+
std::regex host_regex (R"(^(?:(?:ftp|https?|rsync):\/\/)?([^\/\s:?#]+))");
3334
std::smatch host_match;
3435

3536
// Use regex_search to find the host component in the input string

test/sql/extract_host.test

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,13 @@ query I
5353
SELECT extract_host('example.com.ac:443/path/path');
5454
----
5555
example.com.ac
56+
57+
query I
58+
SELECT extract_host('ftp://ftp.example.com/path');
59+
----
60+
ftp.example.com
61+
62+
query I
63+
SELECT extract_host('rsync://rpki.example.com/path');
64+
----
65+
rpki.example.com

0 commit comments

Comments
 (0)