File tree Expand file tree Collapse file tree 2 files changed +85
-2
lines changed Expand file tree Collapse file tree 2 files changed +85
-2
lines changed Original file line number Diff line number Diff line change @@ -41,14 +41,14 @@ namespace duckdb
4141 Connection con (db);
4242
4343 // Extract the host from the URL
44- std::regex host_regex (R"( ^(?:(?:https?|ftp|rsync):\/\/)?([^\/\?:]+ ))" );
44+ std::regex host_regex (R"( ^(?:(?:https?|ftp|rsync):\/\/|mailto: )?((?: [^\/\?:#@]+@)?([^\/\?:#]+) ))" );
4545 std::smatch host_match;
4646 if (!std::regex_search (input, host_match, host_regex))
4747 {
4848 return " " ;
4949 }
5050
51- auto host = host_match[1 ].str ();
51+ auto host = host_match[host_match. size () - 1 ].str ();
5252
5353 // Split the host into parts
5454 std::vector<std::string> parts;
Original file line number Diff line number Diff line change @@ -194,3 +194,86 @@ example.com.ac
194194example.com
195195example.com
196196example.com.ac
197+
198+ # Test IP addresses
199+ query I
200+ SELECT extract_domain('http://192.168.1.1');
201+ ----
202+ (empty)
203+
204+ query I
205+ SELECT extract_domain('http://192.168.1.1:8080');
206+ ----
207+ (empty)
208+
209+ query I
210+ SELECT extract_domain('192.168.1.1');
211+ ----
212+ (empty)
213+
214+ # Test IPv6 addresses
215+ query I
216+ SELECT extract_domain('http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]');
217+ ----
218+ (empty)
219+
220+ query I
221+ SELECT extract_domain('[2001:0db8:85a3:0000:0000:8a2e:0370:7334]');
222+ ----
223+ (empty)
224+
225+ # Test URLs with query parameters
226+ query I
227+ SELECT extract_domain('https://example.com?param=value');
228+ ----
229+ example.com
230+
231+ query I
232+ SELECT extract_domain('https://example.com/path?param=value');
233+ ----
234+ example.com
235+
236+ # Test URLs with fragments
237+ query I
238+ SELECT extract_domain('https://example.com#section');
239+ ----
240+ example.com
241+
242+ query I
243+ SELECT extract_domain('https://example.com/path#section');
244+ ----
245+ example.com
246+
247+ # Test combined query parameters and fragments
248+ query I
249+ SELECT extract_domain('https://example.com?param=value#section');
250+ ----
251+ example.com
252+
253+ # Test special and edge cases
254+ query I
255+ SELECT extract_domain('');
256+ ----
257+ (empty)
258+
259+ query I
260+ SELECT extract_domain(NULL);
261+ ----
262+ NULL
263+
264+ query I
265+ SELECT extract_domain('localhost');
266+ ----
267+ (empty)
268+
269+ # Test scheme with no authority
270+ query I
271+ SELECT extract_domain('file:///path/to/file');
272+ ----
273+ (empty)
274+
275+ # Test mailto URLs
276+ query I
277+ SELECT extract_domain('mailto:
[email protected] ');
278+ ----
279+ example.com
You can’t perform that action at this time.
0 commit comments