Skip to content

Commit bbafe89

Browse files
committed
chore: format files
[skip ci]
1 parent 3081a3c commit bbafe89

12 files changed

+44
-44
lines changed

src/functions/extract_domain.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ namespace duckdb
5757
{
5858
email_domain = email_domain.substr(0, end_pos);
5959
}
60-
60+
6161
// Process the email domain directly
6262
std::string tld = getEffectiveTLD(email_domain);
6363
if (tld.empty())
@@ -71,7 +71,7 @@ namespace duckdb
7171
}
7272

7373
// Extract domain.tld from email domain
74-
if (email_domain.length() > tld.length() &&
74+
if (email_domain.length() > tld.length() &&
7575
email_domain.substr(email_domain.length() - tld.length()) == tld)
7676
{
7777
size_t tld_start = email_domain.length() - tld.length();
@@ -103,25 +103,25 @@ namespace duckdb
103103
bool has_path = input.find('/') != std::string::npos;
104104
bool has_query = input.find('?') != std::string::npos;
105105
bool has_fragment = input.find('#') != std::string::npos;
106-
106+
107107
if (!has_protocol && !has_path && !has_query && !has_fragment)
108108
{
109109
// Check for IPv6 addresses in brackets - these should return empty
110110
if (input.front() == '[' && input.back() == ']')
111111
{
112112
return "";
113113
}
114-
114+
115115
// Treat entire input as hostname, but strip port if present
116116
size_t colon_pos = input.find(':');
117117
size_t host_length = (colon_pos != std::string::npos) ? colon_pos : size;
118-
118+
119119
// Reject single characters as invalid hostnames
120120
if (host_length <= 1)
121121
{
122122
return "";
123123
}
124-
124+
125125
// Single-word hostnames: only accept valid TLDs (e.g., "com"), reject others (e.g., "localhost")
126126
std::string temp_host(data, host_length);
127127
if (temp_host.find('.') == std::string::npos)
@@ -134,7 +134,7 @@ namespace duckdb
134134
// If it's a valid TLD, return it directly
135135
return temp_host;
136136
}
137-
137+
138138
host = std::string_view(data, host_length);
139139
}
140140
else
@@ -149,15 +149,15 @@ namespace duckdb
149149
host.remove_suffix(1);
150150

151151
std::string host_str(host);
152-
152+
153153
// For IPv4 addresses return empty
154154
const char* last_dot = find_last_symbols_or_null<'.'>(host.data(), host.data() + host.size());
155155
if (last_dot && isNumericASCII(last_dot[1]))
156156
return "";
157157

158158
// Apply public suffix algorithm to find longest matching TLD
159159
std::string tld = getEffectiveTLD(host_str);
160-
160+
161161
// If no TLD found, return entire host (for cases like single words)
162162
if (tld.empty())
163163
{
@@ -175,15 +175,15 @@ namespace duckdb
175175
for (char c : host_str) {
176176
if (c == '.') dot_count++;
177177
}
178-
178+
179179
// If no dots, this is not a proper domain (like "localhost")
180180
if (dot_count == 0)
181181
{
182182
return "";
183183
}
184184

185185
// Find where the TLD starts in the hostname
186-
if (host_str.length() > tld.length() &&
186+
if (host_str.length() > tld.length() &&
187187
host_str.substr(host_str.length() - tld.length()) == tld)
188188
{
189189
// Check if there's a dot before the TLD

src/functions/extract_extension.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,34 +56,34 @@ namespace duckdb
5656
// Now pos points to the start of the path
5757
// Find the end of the path (before query or fragment)
5858
const char* path_end = find_first_symbols<'?', '#'>(pos, end);
59-
59+
6060
// Find the last slash in the path to get the filename
6161
const char* last_slash = find_last_symbols_or_null<'/'>(pos, path_end);
6262
const char* filename_start = last_slash ? last_slash + 1 : pos;
63-
63+
6464
// Find the last dot in the filename
6565
const char* last_dot = find_last_symbols_or_null<'.'>(filename_start, path_end);
6666
if (!last_dot || last_dot == filename_start)
6767
return "";
68-
68+
6969
// Check if there's a previous dot (avoid double dots like ..ext)
7070
if (last_dot > filename_start && *(last_dot - 1) == '.')
7171
return "";
72-
72+
7373
// Extract extension
7474
const char* ext_start = last_dot + 1;
7575
size_t ext_length = path_end - ext_start;
76-
76+
7777
// Validate extension (only alphanumeric, max 10 chars)
7878
if (ext_length == 0 || ext_length > 10)
7979
return "";
80-
80+
8181
for (size_t i = 0; i < ext_length; ++i)
8282
{
8383
if (!isAlphaNumericASCII(ext_start[i]))
8484
return "";
8585
}
86-
86+
8787
return std::string(ext_start, ext_length);
8888
}
8989
} // namespace netquack

src/functions/extract_host.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ namespace duckdb
3838
size_t size = input.size();
3939

4040
std::string_view host = getURLHost(data, size);
41-
41+
4242
return std::string(host);
4343
}
4444
} // namespace netquack

src/functions/extract_path.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,10 @@ namespace duckdb
6060
// Extract path without query string or fragment
6161
const char* query_string_or_fragment = find_first_symbols<'?', '#'>(pos, end);
6262
size_t path_size = query_string_or_fragment - pos;
63-
63+
6464
if (path_size == 0)
6565
return "/";
66-
66+
6767
return std::string(pos, path_size);
6868
}
6969
} // namespace netquack

src/functions/extract_port.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ namespace duckdb
8080
if (*p == '/' || *p == '?' || *p == '#')
8181
break;
8282
}
83-
83+
8484
if (at_pos)
8585
{
8686
pos = at_pos + 1; // skip authentication part
@@ -134,10 +134,10 @@ namespace duckdb
134134
break;
135135
if (!isNumericASCII(*p))
136136
return "";
137-
137+
138138
port += *p;
139139
}
140-
140+
141141
return port;
142142
}
143143
} // namespace netquack

src/functions/extract_query.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ namespace duckdb
4848

4949
// Find the fragment '#' character
5050
const char* fragment = find_first_symbols<'#'>(pos, end);
51-
51+
5252
size_t query_size = fragment - pos;
5353
if (query_size == 0)
5454
return "";
55-
55+
5656
return std::string(pos, query_size);
5757
}
5858
} // namespace netquack

src/functions/extract_subdomain.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ namespace duckdb
4848
host.remove_suffix(1);
4949

5050
std::string host_str(host);
51-
51+
5252
// For IPv4 addresses return empty
5353
const char* last_dot = find_last_symbols_or_null<'.'>(host.data(), host.data() + host.size());
5454
if (last_dot && isNumericASCII(last_dot[1]))
@@ -68,7 +68,7 @@ namespace duckdb
6868
}
6969

7070
// Find where the TLD starts in the hostname
71-
if (host_str.length() > tld.length() &&
71+
if (host_str.length() > tld.length() &&
7272
host_str.substr(host_str.length() - tld.length()) == tld)
7373
{
7474
// Check if there's a dot before the TLD

src/functions/extract_tld.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,19 @@ namespace duckdb
4949
bool has_path = input.find('/') != std::string::npos;
5050
bool has_query = input.find('?') != std::string::npos;
5151
bool has_fragment = input.find('#') != std::string::npos;
52-
52+
5353
if (!has_protocol && !has_path && !has_query && !has_fragment)
5454
{
5555
// Treat entire input as hostname, but strip port if present
5656
size_t colon_pos = input.find(':');
5757
size_t host_length = (colon_pos != std::string::npos) ? colon_pos : size;
58-
58+
5959
// Reject single characters as invalid hostnames
6060
if (host_length <= 1)
6161
{
6262
return "";
6363
}
64-
64+
6565
host = std::string_view(data, host_length);
6666
}
6767
else
@@ -75,15 +75,15 @@ namespace duckdb
7575
host.remove_suffix(1);
7676

7777
std::string host_str(host);
78-
78+
7979
// For IPv4 addresses return empty
8080
const char* last_dot = find_last_symbols_or_null<'.'>(host.data(), host.data() + host.size());
8181
if (last_dot && isNumericASCII(last_dot[1]))
8282
return "";
8383

8484
// Use the proper TLD lookup to get the effective TLD
8585
std::string effective_tld = getEffectiveTLD(host_str);
86-
86+
8787
// If the effective TLD is empty, try the last part
8888
if (effective_tld.empty())
8989
{
@@ -94,7 +94,7 @@ namespace duckdb
9494
}
9595
return host_str; // No dots, return entire string
9696
}
97-
97+
9898
return effective_tld;
9999
}
100100
} // namespace netquack

src/netquack_extension.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace duckdb
2525
static void LoadInternal (ExtensionLoader &loader)
2626
{
2727
loader.SetDescription("Parsing, extracting, and analyzing domains, URIs, and paths with ease.");
28-
28+
2929
auto netquack_extract_domain_function = ScalarFunction (
3030
"extract_domain",
3131
{ LogicalType::VARCHAR },
@@ -155,4 +155,4 @@ extern "C" {
155155
DUCKDB_CPP_EXTENSION_ENTRY(netquack, loader) {
156156
duckdb::LoadInternal(loader);
157157
}
158-
}
158+
}

src/utils/tld_lookup.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@ namespace duckdb
2424

2525
// Implement proper public suffix algorithm:
2626
// Find the longest matching public suffix
27-
27+
2828
// First check if the entire hostname is a TLD
2929
if (isValidTLD(hostname))
3030
{
3131
return hostname;
3232
}
33-
33+
3434
// Try all possible suffixes and find the longest match
3535
std::string longest_tld;
36-
36+
3737
for (size_t pos = 0; pos < hostname.length(); ++pos)
3838
{
3939
if (hostname[pos] == '.')
@@ -49,20 +49,20 @@ namespace duckdb
4949
}
5050
}
5151
}
52-
52+
5353
// If we found a valid TLD, return it
5454
if (!longest_tld.empty())
5555
{
5656
return longest_tld;
5757
}
58-
58+
5959
// If no valid TLD found, return the last part after the last dot
6060
size_t last_dot = hostname.find_last_of('.');
6161
if (last_dot != std::string::npos)
6262
{
6363
return hostname.substr(last_dot + 1);
6464
}
65-
65+
6666
// No dots, return entire hostname
6767
return hostname;
6868
}

0 commit comments

Comments
 (0)