Skip to content

Commit 0caeae9

Browse files
lemireDaniel Lemire
andauthored
test: add 2-3 additional tests (#460)
* 2-3 additional tests. * Removing non-ASCII content in the main source files + marking test files as UTF-8. * Reformat. * Reformat --------- Co-authored-by: Daniel Lemire <[email protected]>
1 parent d35f5fe commit 0caeae9

17 files changed

+183
-131
lines changed

include/ada/ada_idna.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ namespace ada::idna {
110110
// this function. We also do not trim control characters. We also assume that
111111
// the input is not empty. We return "" on error.
112112
//
113-
// Example: "www.öbb.at" -> "www.xn--bb-eka.at"
114113
//
115114
// This function may accept or even produce invalid domains.
116115
std::string to_ascii(std::string_view ut8_string);

include/ada/unicode.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ namespace ada::unicode {
2424
* Given a domain, we need to identify its labels.
2525
* They are separated by label-separators:
2626
*
27-
* U+002E ( . ) FULL STOP
28-
* U+FF0E ( . ) FULLWIDTH FULL STOP
29-
* U+3002 ( 。 ) IDEOGRAPHIC FULL STOP
30-
* U+FF61 ( 。 ) HALFWIDTH IDEOGRAPHIC FULL STOP
27+
* U+002E (.) FULL STOP
28+
* U+FF0E FULLWIDTH FULL STOP
29+
* U+3002 IDEOGRAPHIC FULL STOP
30+
* U+FF61 HALFWIDTH IDEOGRAPHIC FULL STOP
3131
*
3232
* They are all mapped to U+002E.
3333
*

include/ada/url-inl.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ size_t url::get_pathname_length() const noexcept { return path.size(); }
8080
out.host_end = out.host_start;
8181

8282
if (!has_opaque_path && checkers::begins_with(path, "//")) {
83-
// If urls host is null, url does not have an opaque path, urls paths
84-
// size is greater than 1, and urls path[0] is the empty string, then
83+
// If url's host is null, url does not have an opaque path, url's path's
84+
// size is greater than 1, and url's path[0] is the empty string, then
8585
// append U+002F (/) followed by U+002E (.) to output.
8686
running_index = out.protocol_end + 2;
8787
} else {
@@ -195,8 +195,8 @@ inline void url::copy_scheme(const ada::url &u) {
195195
output += ":" + get_port();
196196
}
197197
} else if (!has_opaque_path && checkers::begins_with(path, "//")) {
198-
// If urls host is null, url does not have an opaque path, urls paths
199-
// size is greater than 1, and urls path[0] is the empty string, then
198+
// If url's host is null, url does not have an opaque path, url's path's
199+
// size is greater than 1, and url's path[0] is the empty string, then
200200
// append U+002F (/) followed by U+002E (.) to output.
201201
output += "/.";
202202
}

include/ada/url.h

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -45,48 +45,48 @@ struct url : url_base {
4545

4646
/**
4747
* @private
48-
* A URLs username is an ASCII string identifying a username. It is initially
48+
* A URL's username is an ASCII string identifying a username. It is initially
4949
* the empty string.
5050
*/
5151
std::string username{};
5252

5353
/**
5454
* @private
55-
* A URLs password is an ASCII string identifying a password. It is initially
55+
* A URL's password is an ASCII string identifying a password. It is initially
5656
* the empty string.
5757
*/
5858
std::string password{};
5959

6060
/**
6161
* @private
62-
* A URLs host is null or a host. It is initially null.
62+
* A URL's host is null or a host. It is initially null.
6363
*/
6464
std::optional<std::string> host{};
6565

6666
/**
6767
* @private
68-
* A URLs port is either null or a 16-bit unsigned integer that identifies a
68+
* A URL's port is either null or a 16-bit unsigned integer that identifies a
6969
* networking port. It is initially null.
7070
*/
7171
std::optional<uint16_t> port{};
7272

7373
/**
7474
* @private
75-
* A URLs path is either an ASCII string or a list of zero or more ASCII
75+
* A URL's path is either an ASCII string or a list of zero or more ASCII
7676
* strings, usually identifying a location.
7777
*/
7878
std::string path{};
7979

8080
/**
8181
* @private
82-
* A URLs query is either null or an ASCII string. It is initially null.
82+
* A URL's query is either null or an ASCII string. It is initially null.
8383
*/
8484
std::optional<std::string> query{};
8585

8686
/**
8787
* @private
88-
* A URLs fragment is either null or an ASCII string that can be used for
89-
* further processing on the resource the URLs other components identify. It
88+
* A URL's fragment is either null or an ASCII string that can be used for
89+
* further processing on the resource the URL's other components identify. It
9090
* is initially null.
9191
*/
9292
std::optional<std::string> hash{};
@@ -111,23 +111,23 @@ struct url : url_base {
111111
[[nodiscard]] ada_really_inline std::string get_href() const noexcept;
112112

113113
/**
114-
* The origin getter steps are to return the serialization of thiss URLs
114+
* The origin getter steps are to return the serialization of this's URL's
115115
* origin. [HTML]
116116
* @return a newly allocated string.
117117
* @see https://url.spec.whatwg.org/#concept-url-origin
118118
*/
119119
[[nodiscard]] std::string get_origin() const noexcept override;
120120

121121
/**
122-
* The protocol getter steps are to return thiss URLs scheme, followed by
122+
* The protocol getter steps are to return this's URL's scheme, followed by
123123
* U+003A (:).
124124
* @return a newly allocated string.
125125
* @see https://url.spec.whatwg.org/#dom-url-protocol
126126
*/
127127
[[nodiscard]] std::string get_protocol() const noexcept;
128128

129129
/**
130-
* Return urls host, serialized, followed by U+003A (:) and urls port,
130+
* Return url's host, serialized, followed by U+003A (:) and url's port,
131131
* serialized.
132132
* When there is no host, this function returns the empty string.
133133
* @return a newly allocated string.
@@ -136,7 +136,7 @@ struct url : url_base {
136136
[[nodiscard]] std::string get_host() const noexcept;
137137

138138
/**
139-
* Return thiss URLs host, serialized.
139+
* Return this's URL's host, serialized.
140140
* When there is no host, this function returns the empty string.
141141
* @return a newly allocated string.
142142
* @see https://url.spec.whatwg.org/#dom-url-hostname
@@ -145,7 +145,7 @@ struct url : url_base {
145145

146146
/**
147147
* The pathname getter steps are to return the result of URL path serializing
148-
* thiss URL.
148+
* this's URL.
149149
* @return a newly allocated string.
150150
* @see https://url.spec.whatwg.org/#dom-url-pathname
151151
*/
@@ -160,14 +160,14 @@ struct url : url_base {
160160
ada_really_inline size_t get_pathname_length() const noexcept;
161161

162162
/**
163-
* Return U+003F (?), followed by thiss URLs query.
163+
* Return U+003F (?), followed by this's URL's query.
164164
* @return a newly allocated string.
165165
* @see https://url.spec.whatwg.org/#dom-url-search
166166
*/
167167
[[nodiscard]] std::string get_search() const noexcept;
168168

169169
/**
170-
* The username getter steps are to return thiss URLs username.
170+
* The username getter steps are to return this's URL's username.
171171
* @return a constant reference to the underlying string.
172172
* @see https://url.spec.whatwg.org/#dom-url-username
173173
*/
@@ -233,21 +233,21 @@ struct url : url_base {
233233
bool set_href(const std::string_view input);
234234

235235
/**
236-
* The password getter steps are to return thiss URLs password.
236+
* The password getter steps are to return this's URL's password.
237237
* @return a constant reference to the underlying string.
238238
* @see https://url.spec.whatwg.org/#dom-url-password
239239
*/
240240
[[nodiscard]] const std::string &get_password() const noexcept;
241241

242242
/**
243-
* Return thiss URLs port, serialized.
243+
* Return this's URL's port, serialized.
244244
* @return a newly constructed string representing the port.
245245
* @see https://url.spec.whatwg.org/#dom-url-port
246246
*/
247247
[[nodiscard]] std::string get_port() const noexcept;
248248

249249
/**
250-
* Return U+0023 (#), followed by thiss URLs fragment.
250+
* Return U+0023 (#), followed by this's URL's fragment.
251251
* @return a newly constructed string representing the hash.
252252
* @see https://url.spec.whatwg.org/#dom-url-hash
253253
*/
@@ -333,7 +333,7 @@ struct url : url_base {
333333
[[nodiscard]] bool parse_opaque_host(std::string_view input);
334334

335335
/**
336-
* A URLs scheme is an ASCII string that identifies the type of URL and can
336+
* A URL's scheme is an ASCII string that identifies the type of URL and can
337337
* be used to dispatch a URL for further processing after parsing. It is
338338
* initially the empty string. We only set non_special_scheme when the scheme
339339
* is non-special, otherwise we avoid constructing string.

include/ada/url_aggregator-inl.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ inline void url_aggregator::update_base_pathname(const std::string_view input) {
276276

277277
if (begins_with_dashdash && !has_opaque_path && !has_authority() &&
278278
!has_dash_dot()) {
279-
// If urls host is null, url does not have an opaque path, urls paths
279+
// If url's host is null, url does not have an opaque path, url's path's
280280
// size is greater than 1, then append U+002F (/) followed by U+002E (.) to
281281
// output.
282282
buffer.insert(components.pathname_start, "/.");
@@ -809,8 +809,8 @@ inline bool url_aggregator::has_port() const noexcept {
809809
}
810810

811811
inline bool url_aggregator::has_dash_dot() const noexcept {
812-
// If urls host is null, url does not have an opaque path, urls paths size
813-
// is greater than 1, and urls path[0] is the empty string, then append
812+
// If url's host is null, url does not have an opaque path, url's path's size
813+
// is greater than 1, and url's path[0] is the empty string, then append
814814
// U+002F (/) followed by U+002E (.) to output.
815815
ada_log("url_aggregator::has_dash_dot");
816816
// Performance: instead of doing this potentially expensive check, we could

include/ada/url_aggregator.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ struct url_aggregator : url_base {
4343

4444
[[nodiscard]] bool has_valid_domain() const noexcept override;
4545
/**
46-
* The origin getter steps are to return the serialization of thiss URLs
46+
* The origin getter steps are to return the serialization of this's URL's
4747
* origin. [HTML]
4848
* @return a newly allocated string.
4949
* @see https://url.spec.whatwg.org/#concept-url-origin
@@ -59,35 +59,35 @@ struct url_aggregator : url_base {
5959
*/
6060
inline std::string_view get_href() const noexcept;
6161
/**
62-
* The username getter steps are to return thiss URLs username.
62+
* The username getter steps are to return this's URL's username.
6363
* This function does not allocate memory.
6464
* @return a lightweight std::string_view.
6565
* @see https://url.spec.whatwg.org/#dom-url-username
6666
*/
6767
[[nodiscard]] std::string_view get_username() const noexcept;
6868
/**
69-
* The password getter steps are to return thiss URLs password.
69+
* The password getter steps are to return this's URL's password.
7070
* This function does not allocate memory.
7171
* @return a lightweight std::string_view.
7272
* @see https://url.spec.whatwg.org/#dom-url-password
7373
*/
7474
[[nodiscard]] std::string_view get_password() const noexcept;
7575
/**
76-
* Return thiss URLs port, serialized.
76+
* Return this's URL's port, serialized.
7777
* This function does not allocate memory.
7878
* @return a lightweight std::string_view.
7979
* @see https://url.spec.whatwg.org/#dom-url-port
8080
*/
8181
[[nodiscard]] std::string_view get_port() const noexcept;
8282
/**
83-
* Return U+0023 (#), followed by thiss URLs fragment.
83+
* Return U+0023 (#), followed by this's URL's fragment.
8484
* This function does not allocate memory.
8585
* @return a lightweight std::string_view..
8686
* @see https://url.spec.whatwg.org/#dom-url-hash
8787
*/
8888
[[nodiscard]] std::string_view get_hash() const noexcept;
8989
/**
90-
* Return urls host, serialized, followed by U+003A (:) and urls port,
90+
* Return url's host, serialized, followed by U+003A (:) and url's port,
9191
* serialized.
9292
* This function does not allocate memory.
9393
* When there is no host, this function returns the empty view.
@@ -96,7 +96,7 @@ struct url_aggregator : url_base {
9696
*/
9797
[[nodiscard]] std::string_view get_host() const noexcept;
9898
/**
99-
* Return thiss URLs host, serialized.
99+
* Return this's URL's host, serialized.
100100
* This function does not allocate memory.
101101
* When there is no host, this function returns the empty view.
102102
* @return a lightweight std::string_view.
@@ -105,7 +105,7 @@ struct url_aggregator : url_base {
105105
[[nodiscard]] std::string_view get_hostname() const noexcept;
106106
/**
107107
* The pathname getter steps are to return the result of URL path serializing
108-
* thiss URL.
108+
* this's URL.
109109
* This function does not allocate memory.
110110
* @return a lightweight std::string_view.
111111
* @see https://url.spec.whatwg.org/#dom-url-pathname
@@ -119,14 +119,14 @@ struct url_aggregator : url_base {
119119
*/
120120
ada_really_inline uint32_t get_pathname_length() const noexcept;
121121
/**
122-
* Return U+003F (?), followed by thiss URLs query.
122+
* Return U+003F (?), followed by this's URL's query.
123123
* This function does not allocate memory.
124124
* @return a lightweight std::string_view.
125125
* @see https://url.spec.whatwg.org/#dom-url-search
126126
*/
127127
[[nodiscard]] std::string_view get_search() const noexcept;
128128
/**
129-
* The protocol getter steps are to return thiss URLs scheme, followed by
129+
* The protocol getter steps are to return this's URL's scheme, followed by
130130
* U+003A (:).
131131
* This function does not allocate memory.
132132
* @return a lightweight std::string_view.

include/ada/url_base.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct url_base {
4747
[[nodiscard]] ada_really_inline bool is_special() const noexcept;
4848

4949
/**
50-
* The origin getter steps are to return the serialization of thiss URLs
50+
* The origin getter steps are to return the serialization of this's URL's
5151
* origin. [HTML]
5252
* @return a newly allocated string.
5353
* @see https://url.spec.whatwg.org/#concept-url-origin

src/helpers.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@ ada_really_inline bool shorten_path(std::string& path,
102102
ada::scheme::type type) noexcept {
103103
size_t first_delimiter = path.find_first_of('/', 1);
104104

105-
// Let path be urls path.
106-
// If urls scheme is "file", paths size is 1, and path[0] is a normalized
105+
// Let path be url's path.
106+
// If url's scheme is "file", path's size is 1, and path[0] is a normalized
107107
// Windows drive letter, then return.
108108
if (type == ada::scheme::type::FILE &&
109109
first_delimiter == std::string_view::npos && !path.empty()) {
@@ -113,7 +113,7 @@ ada_really_inline bool shorten_path(std::string& path,
113113
}
114114
}
115115

116-
// Remove paths last item, if any.
116+
// Remove path's last item, if any.
117117
size_t last_delimiter = path.rfind('/');
118118
if (last_delimiter != std::string::npos) {
119119
path.erase(last_delimiter);
@@ -127,8 +127,8 @@ ada_really_inline bool shorten_path(std::string_view& path,
127127
ada::scheme::type type) noexcept {
128128
size_t first_delimiter = path.find_first_of('/', 1);
129129

130-
// Let path be urls path.
131-
// If urls scheme is "file", paths size is 1, and path[0] is a normalized
130+
// Let path be url's path.
131+
// If url's scheme is "file", path's size is 1, and path[0] is a normalized
132132
// Windows drive letter, then return.
133133
if (type == ada::scheme::type::FILE &&
134134
first_delimiter == std::string_view::npos && !path.empty()) {
@@ -138,7 +138,7 @@ ada_really_inline bool shorten_path(std::string_view& path,
138138
}
139139
}
140140

141-
// Remove paths last item, if any.
141+
// Remove path's last item, if any.
142142
if (!path.empty()) {
143143
size_t slash_loc = path.rfind('/');
144144
if (slash_loc != std::string_view::npos) {
@@ -551,7 +551,7 @@ ada_really_inline void parse_prepared_path(std::string_view input,
551551
}
552552
// Otherwise, if path_buffer is not a single-dot path segment, then:
553553
else if (!unicode::is_single_dot_path_segment(path_buffer)) {
554-
// If urls scheme is "file", urls path is empty, and path_buffer is a
554+
// If url's scheme is "file", url's path is empty, and path_buffer is a
555555
// Windows drive letter, then replace the second code point in
556556
// path_buffer with U+003A (:).
557557
if (type == ada::scheme::type::FILE && path.empty() &&
@@ -562,7 +562,7 @@ ada_really_inline void parse_prepared_path(std::string_view input,
562562
path_buffer.remove_prefix(2);
563563
path.append(path_buffer);
564564
} else {
565-
// Append path_buffer to urls path.
565+
// Append path_buffer to url's path.
566566
path += '/';
567567
path.append(path_buffer);
568568
}

0 commit comments

Comments
 (0)