@@ -57,7 +57,7 @@ namespace duckdb
5757 {
5858 email_domain = email_domain.substr (0 , end_pos);
5959 }
60-
60+
6161 // Process the email domain directly
6262 std::string tld = getEffectiveTLD (email_domain);
6363 if (tld.empty ())
@@ -71,7 +71,7 @@ namespace duckdb
7171 }
7272
7373 // Extract domain.tld from email domain
74- if (email_domain.length () > tld.length () &&
74+ if (email_domain.length () > tld.length () &&
7575 email_domain.substr (email_domain.length () - tld.length ()) == tld)
7676 {
7777 size_t tld_start = email_domain.length () - tld.length ();
@@ -103,25 +103,25 @@ namespace duckdb
103103 bool has_path = input.find (' /' ) != std::string::npos;
104104 bool has_query = input.find (' ?' ) != std::string::npos;
105105 bool has_fragment = input.find (' #' ) != std::string::npos;
106-
106+
107107 if (!has_protocol && !has_path && !has_query && !has_fragment)
108108 {
109109 // Check for IPv6 addresses in brackets - these should return empty
110110 if (input.front () == ' [' && input.back () == ' ]' )
111111 {
112112 return " " ;
113113 }
114-
114+
115115 // Treat entire input as hostname, but strip port if present
116116 size_t colon_pos = input.find (' :' );
117117 size_t host_length = (colon_pos != std::string::npos) ? colon_pos : size;
118-
118+
119119 // Reject single characters as invalid hostnames
120120 if (host_length <= 1 )
121121 {
122122 return " " ;
123123 }
124-
124+
125125 // Single-word hostnames: only accept valid TLDs (e.g., "com"), reject others (e.g., "localhost")
126126 std::string temp_host (data, host_length);
127127 if (temp_host.find (' .' ) == std::string::npos)
@@ -134,7 +134,7 @@ namespace duckdb
134134 // If it's a valid TLD, return it directly
135135 return temp_host;
136136 }
137-
137+
138138 host = std::string_view (data, host_length);
139139 }
140140 else
@@ -149,15 +149,15 @@ namespace duckdb
149149 host.remove_suffix (1 );
150150
151151 std::string host_str (host);
152-
152+
153153 // For IPv4 addresses return empty
154154 const char * last_dot = find_last_symbols_or_null<' .' >(host.data (), host.data () + host.size ());
155155 if (last_dot && isNumericASCII (last_dot[1 ]))
156156 return " " ;
157157
158158 // Apply public suffix algorithm to find longest matching TLD
159159 std::string tld = getEffectiveTLD (host_str);
160-
160+
161161 // If no TLD found, return entire host (for cases like single words)
162162 if (tld.empty ())
163163 {
@@ -175,15 +175,15 @@ namespace duckdb
175175 for (char c : host_str) {
176176 if (c == ' .' ) dot_count++;
177177 }
178-
178+
179179 // If no dots, this is not a proper domain (like "localhost")
180180 if (dot_count == 0 )
181181 {
182182 return " " ;
183183 }
184184
185185 // Find where the TLD starts in the hostname
186- if (host_str.length () > tld.length () &&
186+ if (host_str.length () > tld.length () &&
187187 host_str.substr (host_str.length () - tld.length ()) == tld)
188188 {
189189 // Check if there's a dot before the TLD
0 commit comments