Skip to content

Commit bb52321

Browse files
authored
Update opaque_path, ensure opaque URL paths always roundtrip (#925)
1 parent 6eeec3c commit bb52321

File tree

7 files changed

+198
-28
lines changed

7 files changed

+198
-28
lines changed

src/parser.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,10 +619,18 @@ result_type parse_url_impl(std::string_view user_input,
619619
input_position = input_size + 1;
620620
}
621621
url.has_opaque_path = true;
622+
622623
// This is a really unlikely scenario in real world. We should not seek
623624
// to optimize it.
624-
url.update_base_pathname(unicode::percent_encode(
625-
view, character_sets::C0_CONTROL_PERCENT_ENCODE));
625+
if (view.ends_with(' ')) {
626+
std::string modified_view =
627+
std::string(view.begin(), view.end() - 1) + "%20";
628+
url.update_base_pathname(unicode::percent_encode(
629+
modified_view, character_sets::C0_CONTROL_PERCENT_ENCODE));
630+
} else {
631+
url.update_base_pathname(unicode::percent_encode(
632+
view, character_sets::C0_CONTROL_PERCENT_ENCODE));
633+
}
626634
break;
627635
}
628636
case state::PORT: {

tests/basic_tests.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,8 @@ TYPED_TEST(basic_tests, nodejs2) {
196196
ASSERT_EQ(url->get_search(), "?test");
197197
url->set_search("");
198198
ASSERT_EQ(url->get_search(), "");
199-
ASSERT_EQ(url->get_pathname(), "space");
200-
ASSERT_EQ(url->get_href(), "data:space");
199+
ASSERT_EQ(url->get_pathname(), "space %20");
200+
ASSERT_EQ(url->get_href(), "data:space %20");
201201
SUCCEED();
202202
}
203203

@@ -206,8 +206,8 @@ TYPED_TEST(basic_tests, nodejs3) {
206206
ASSERT_EQ(url->get_search(), "?test");
207207
url->set_search("");
208208
ASSERT_EQ(url->get_search(), "");
209-
ASSERT_EQ(url->get_pathname(), "space ");
210-
ASSERT_EQ(url->get_href(), "data:space #test");
209+
ASSERT_EQ(url->get_pathname(), "space %20");
210+
ASSERT_EQ(url->get_href(), "data:space %20#test");
211211
SUCCEED();
212212
}
213213

tests/wpt/ada_extra_setters_tests.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@
127127
"new_value": "",
128128
"expected": {
129129
"search": "",
130-
"pathname": "space"
130+
"pathname": "space %20"
131131
}
132132
}
133133
]

tests/wpt/ada_extra_urltestdata.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,15 +224,15 @@
224224
{
225225
"input": "data:space ?test#test",
226226
"base": "about:blank",
227-
"href": "data:space ?test#test",
227+
"href": "data:space %20?test#test",
228228
"origin": "null",
229229
"protocol": "data:",
230230
"username": "",
231231
"password": "",
232232
"host": "",
233233
"hostname": "",
234234
"port": "",
235-
"pathname": "space ",
235+
"pathname": "space %20",
236236
"search": "?test",
237237
"hash": "#test"
238238
},

tests/wpt/setters_tests.json

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,6 +1177,24 @@
11771177
"host": "test.invalid",
11781178
"hostname": "test.invalid"
11791179
}
1180+
},
1181+
{
1182+
"href": "https://test.invalid/",
1183+
"new_value": "test/@aaa",
1184+
"expected": {
1185+
"href": "https://test/",
1186+
"host": "test",
1187+
"hostname": "test"
1188+
}
1189+
},
1190+
{
1191+
"href": "https://test.invalid/",
1192+
"new_value": "test/:aaa",
1193+
"expected": {
1194+
"href": "https://test/",
1195+
"host": "test",
1196+
"hostname": "test"
1197+
}
11801198
}
11811199
],
11821200
"hostname": [
@@ -1624,6 +1642,24 @@
16241642
"host": "test.invalid",
16251643
"hostname": "test.invalid"
16261644
}
1645+
},
1646+
{
1647+
"href": "https://test.invalid/",
1648+
"new_value": "test/@aaa",
1649+
"expected": {
1650+
"href": "https://test/",
1651+
"host": "test",
1652+
"hostname": "test"
1653+
}
1654+
},
1655+
{
1656+
"href": "https://test.invalid/",
1657+
"new_value": "test/:aaa",
1658+
"expected": {
1659+
"href": "https://test/",
1660+
"host": "test",
1661+
"hostname": "test"
1662+
}
16271663
}
16281664
],
16291665
"port": [
@@ -2241,38 +2277,38 @@
22412277
}
22422278
},
22432279
{
2244-
"comment": "Drop trailing spaces from trailing opaque paths",
2280+
"comment": "Trailing spaces and opaque paths",
22452281
"href": "data:space ?query",
22462282
"new_value": "",
22472283
"expected": {
2248-
"href": "data:space",
2249-
"pathname": "space",
2284+
"href": "data:space%20",
2285+
"pathname": "space%20",
22502286
"search": ""
22512287
}
22522288
},
22532289
{
22542290
"href": "sc:space ?query",
22552291
"new_value": "",
22562292
"expected": {
2257-
"href": "sc:space",
2258-
"pathname": "space",
2293+
"href": "sc:space%20",
2294+
"pathname": "space%20",
22592295
"search": ""
22602296
}
22612297
},
22622298
{
2263-
"comment": "Do not drop trailing spaces from non-trailing opaque paths",
2299+
"comment": "Trailing spaces and opaque paths",
22642300
"href": "data:space ?query#fragment",
22652301
"new_value": "",
22662302
"expected": {
2267-
"href": "data:space #fragment",
2303+
"href": "data:space %20#fragment",
22682304
"search": ""
22692305
}
22702306
},
22712307
{
22722308
"href": "sc:space ?query#fragment",
22732309
"new_value": "",
22742310
"expected": {
2275-
"href": "sc:space #fragment",
2311+
"href": "sc:space %20#fragment",
22762312
"search": ""
22772313
}
22782314
},
@@ -2429,38 +2465,38 @@
24292465
}
24302466
},
24312467
{
2432-
"comment": "Drop trailing spaces from trailing opaque paths",
2468+
"comment": "Trailing spaces and opaque paths",
24332469
"href": "data:space #fragment",
24342470
"new_value": "",
24352471
"expected": {
2436-
"href": "data:space",
2437-
"pathname": "space",
2472+
"href": "data:space %20",
2473+
"pathname": "space %20",
24382474
"hash": ""
24392475
}
24402476
},
24412477
{
24422478
"href": "sc:space #fragment",
24432479
"new_value": "",
24442480
"expected": {
2445-
"href": "sc:space",
2446-
"pathname": "space",
2481+
"href": "sc:space %20",
2482+
"pathname": "space %20",
24472483
"hash": ""
24482484
}
24492485
},
24502486
{
2451-
"comment": "Do not drop trailing spaces from non-trailing opaque paths",
2487+
"comment": "Trailing spaces and opaque paths",
24522488
"href": "data:space ?query#fragment",
24532489
"new_value": "",
24542490
"expected": {
2455-
"href": "data:space ?query",
2491+
"href": "data:space %20?query",
24562492
"hash": ""
24572493
}
24582494
},
24592495
{
24602496
"href": "sc:space ?query#fragment",
24612497
"new_value": "",
24622498
"expected": {
2463-
"href": "sc:space ?query",
2499+
"href": "sc:space %20?query",
24642500
"hash": ""
24652501
}
24662502
},

tests/wpt/toascii.json

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
[
22
"This contains assorted IDNA tests that IdnaTestV2 might not cover.",
33
"Feel free to deduplicate with a clear commit message.",
4+
"",
5+
"If the test only applies to the URL Standard's 'domain to ASCII', ",
6+
"and not to TR46's ToASCII, then tag it with `urlStandardOnly`",
47
{
58
"comment": "Label with hyphens in 3rd and 4th position",
69
"input": "aa--",
@@ -239,19 +242,22 @@
239242
},
240243
{
241244
"input": "www.lookout.net\u2A7480",
242-
"output": null
245+
"output": null,
246+
"urlStandardOnly": true
243247
},
244248
{
245249
"input": "www\u00A0.lookout.net",
246-
"output": null
250+
"output": null,
251+
"urlStandardOnly": true
247252
},
248253
{
249254
"input": "\u1680lookout.net",
250255
"output": null
251256
},
252257
{
253258
"input": "\u001flookout.net",
254-
"output": null
259+
"output": null,
260+
"urlStandardOnly": true
255261
},
256262
{
257263
"input": "look\u06DDout.net",

tests/wpt/urltestdata.json

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3778,6 +3778,126 @@
37783778
"search": "",
37793779
"hash": ""
37803780
},
3781+
{
3782+
"input": "non-special:opaque ",
3783+
"base": null,
3784+
"href": "non-special:opaque",
3785+
"origin": "null",
3786+
"protocol": "non-special:",
3787+
"username": "",
3788+
"password": "",
3789+
"host": "",
3790+
"hostname": "",
3791+
"port": "",
3792+
"pathname": "opaque",
3793+
"search": "",
3794+
"hash": ""
3795+
},
3796+
{
3797+
"input": "non-special:opaque ?hi",
3798+
"base": null,
3799+
"href": "non-special:opaque %20?hi",
3800+
"origin": "null",
3801+
"protocol": "non-special:",
3802+
"username": "",
3803+
"password": "",
3804+
"host": "",
3805+
"hostname": "",
3806+
"port": "",
3807+
"pathname": "opaque %20",
3808+
"search": "?hi",
3809+
"hash": ""
3810+
},
3811+
{
3812+
"input": "non-special:opaque #hi",
3813+
"base": null,
3814+
"href": "non-special:opaque %20#hi",
3815+
"origin": "null",
3816+
"protocol": "non-special:",
3817+
"username": "",
3818+
"password": "",
3819+
"host": "",
3820+
"hostname": "",
3821+
"port": "",
3822+
"pathname": "opaque %20",
3823+
"search": "",
3824+
"hash": "#hi"
3825+
},
3826+
{
3827+
"input": "non-special:opaque x?hi",
3828+
"base": null,
3829+
"href": "non-special:opaque x?hi",
3830+
"origin": "null",
3831+
"protocol": "non-special:",
3832+
"username": "",
3833+
"password": "",
3834+
"host": "",
3835+
"hostname": "",
3836+
"port": "",
3837+
"pathname": "opaque x",
3838+
"search": "?hi",
3839+
"hash": ""
3840+
},
3841+
{
3842+
"input": "non-special:opaque x#hi",
3843+
"base": null,
3844+
"href": "non-special:opaque x#hi",
3845+
"origin": "null",
3846+
"protocol": "non-special:",
3847+
"username": "",
3848+
"password": "",
3849+
"host": "",
3850+
"hostname": "",
3851+
"port": "",
3852+
"pathname": "opaque x",
3853+
"search": "",
3854+
"hash": "#hi"
3855+
},
3856+
{
3857+
"input": "non-special:opaque \t\t \t#hi",
3858+
"base": null,
3859+
"href": "non-special:opaque %20#hi",
3860+
"origin": "null",
3861+
"protocol": "non-special:",
3862+
"username": "",
3863+
"password": "",
3864+
"host": "",
3865+
"hostname": "",
3866+
"port": "",
3867+
"pathname": "opaque %20",
3868+
"search": "",
3869+
"hash": "#hi"
3870+
},
3871+
{
3872+
"input": "non-special:opaque \t\t #hi",
3873+
"base": null,
3874+
"href": "non-special:opaque %20#hi",
3875+
"origin": "null",
3876+
"protocol": "non-special:",
3877+
"username": "",
3878+
"password": "",
3879+
"host": "",
3880+
"hostname": "",
3881+
"port": "",
3882+
"pathname": "opaque %20",
3883+
"search": "",
3884+
"hash": "#hi"
3885+
},
3886+
{
3887+
"input": "non-special:opaque\t\t \r #hi",
3888+
"base": null,
3889+
"href": "non-special:opaque %20#hi",
3890+
"origin": "null",
3891+
"protocol": "non-special:",
3892+
"username": "",
3893+
"password": "",
3894+
"host": "",
3895+
"hostname": "",
3896+
"port": "",
3897+
"pathname": "opaque %20",
3898+
"search": "",
3899+
"hash": "#hi"
3900+
},
37813901
"Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)",
37823902
{
37833903
"input": "http://www.foo。bar.com",

0 commit comments

Comments
 (0)