File tree Expand file tree Collapse file tree 2 files changed +10
-4
lines changed Expand file tree Collapse file tree 2 files changed +10
-4
lines changed Original file line number Diff line number Diff line change @@ -36,11 +36,12 @@ namespace duckdb
3636 // Explanation:
3737 // ^ - Start of the string
3838 // (?: - Non-capturing group for the protocol and domain part
39- // (?:(?:ftp|https?|rsync):\/\/)? - Optional ftp://, http://, https://, or rsync://
40- // (?:[^\/\s]+) - Domain name (any characters except '/' or whitespace)
39+ // (?:(?:ftp|https?|rsync):\/\/)? - Optional protocol ( ftp://, http://, https://, or rsync://)
40+ // (?:[^\/\s]+) - Domain name or IP address (any characters except '/' or whitespace)
4141 // )
42- // (\/[^?#]*) - Capturing group for the path (starts with '/', followed by any characters except '?' or '#')
43- std::regex path_regex (R"( ^(?:(?:(?:ftp|https?|rsync):\/\/)?(?:[^\/\s]+))(\/[^?#]*))" );
42+ // (\/[^?#]*)? - Optional capturing group for the path (starts with '/', followed by any characters except '?' or '#')
43+ // - The '?' at the end makes the path component optional, allowing the regex to match URLs with or without a path
44+ std::regex path_regex (R"( ^(?:(?:(?:ftp|https?|rsync):\/\/)?(?:[^\/\s]+))(\/[^?#]*)?)" );
4445 std::smatch path_match;
4546
4647 // Use regex_search to find the path component in the input string
Original file line number Diff line number Diff line change @@ -7,6 +7,11 @@ require netquack
77statement ok
88CREATE TABLE uri_list AS SELECT * FROM read_csv('test/data/examples.csv', header=false, columns={'uri': 'VARCHAR'});
99
10+ query I
11+ SELECT extract_path('http://example.com.ac');
12+ ----
13+ /
14+
1015query I
1116SELECT extract_path('http://example.com.ac/path');
1217----
You can’t perform that action at this time.
0 commit comments