Skip to content

Commit f65e8d9

Browse files
committed
Add a shorten_path helper.
1 parent ca879d8 commit f65e8d9

File tree

3 files changed

+88
-11
lines changed

3 files changed

+88
-11
lines changed

src/host.rs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,52 @@ pub(crate) enum HostInternal {
2424
Ipv6(Ipv6Addr),
2525
}
2626

27-
impl<S> From<Host<S>> for HostInternal {
27+
#[cfg(feature = "serde")]
28+
impl ::serde::Serialize for HostInternal {
29+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
30+
where
31+
S: ::serde::Serializer,
32+
{
33+
// This doesn’t use `derive` because that involves
34+
// large dependencies (that take a long time to build), and
35+
// either Macros 1.1 which are not stable yet or a cumbersome build script.
36+
//
37+
// Implementing `Serializer` correctly for an enum is tricky,
38+
// so let’s use existing enums that already do.
39+
use std::net::IpAddr;
40+
match *self {
41+
HostInternal::None => None,
42+
HostInternal::Domain => Some(None),
43+
HostInternal::Ipv4(addr) => Some(Some(IpAddr::V4(addr))),
44+
HostInternal::Ipv6(addr) => Some(Some(IpAddr::V6(addr))),
45+
}
46+
.serialize(serializer)
47+
}
48+
}
49+
50+
#[cfg(feature = "serde")]
51+
impl<'de> ::serde::Deserialize<'de> for HostInternal {
52+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
53+
where
54+
D: ::serde::Deserializer<'de>,
55+
{
56+
use std::net::IpAddr;
57+
Ok(match ::serde::Deserialize::deserialize(deserializer)? {
58+
None => HostInternal::None,
59+
Some(None) => HostInternal::Domain,
60+
Some(Some(IpAddr::V4(addr))) => HostInternal::Ipv4(addr),
61+
Some(Some(IpAddr::V6(addr))) => HostInternal::Ipv6(addr),
62+
})
63+
}
64+
}
65+
66+
impl<S> From<Host<S>> for HostInternal
67+
where
68+
S: ToString,
69+
{
2870
fn from(host: Host<S>) -> HostInternal {
2971
match host {
72+
Host::Domain(ref s) if s.to_string().is_empty() => HostInternal::None,
3073
Host::Domain(_) => HostInternal::Domain,
3174
Host::Ipv4(address) => HostInternal::Ipv4(address),
3275
Host::Ipv6(address) => HostInternal::Ipv6(address),

src/parser.rs

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ impl<'a> Parser<'a> {
638638
(Some(i), _) | (None, Some(i)) => base_url.slice(..i),
639639
};
640640
self.serialization.push_str(before_query);
641-
self.pop_path(SchemeType::File, base_url.path_start as usize);
641+
self.shorten_path(SchemeType::File, base_url.path_start as usize);
642642
let remaining = self.parse_path(
643643
SchemeType::File,
644644
&mut true,
@@ -967,7 +967,7 @@ impl<'a> Parser<'a> {
967967
host_str = &input_str[..bytes]
968968
}
969969
}
970-
if scheme_type.is_special() && host_str.is_empty() {
970+
if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
971971
return Err(ParseError::EmptyHost);
972972
}
973973
if !scheme_type.is_special() {
@@ -1157,15 +1157,15 @@ impl<'a> Parser<'a> {
11571157
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
11581158
| ".%2E" => {
11591159
debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1160-
// We dont want to truncate beyond the path start:
1161-
if segment_start - 1 > path_start {
1162-
self.serialization.truncate(segment_start - 1); // Truncate "/.."
1160+
self.serialization.truncate(segment_start);
1161+
// Do not remove the root slash
1162+
if self.serialization.ends_with("/") && path_start + 1 < segment_start {
1163+
self.serialization.pop();
1164+
self.shorten_path(scheme_type, path_start);
11631165
} else {
1164-
self.serialization.truncate(segment_start); // Truncate ".."
1166+
self.shorten_path(scheme_type, path_start);
11651167
}
11661168

1167-
self.pop_path(scheme_type, path_start);
1168-
11691169
// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
11701170
if ends_with_slash && !self.serialization.ends_with("/") {
11711171
self.serialization.push('/');
@@ -1209,16 +1209,36 @@ impl<'a> Parser<'a> {
12091209
input
12101210
}
12111211

1212+
/// https://url.spec.whatwg.org/#shorten-a-urls-path
1213+
fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1214+
// If path is empty, then return.
1215+
if self.serialization.len() <= path_start {
1216+
return;
1217+
}
1218+
// If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
1219+
let segments: Vec<&str> = self.serialization[path_start..]
1220+
.split('/')
1221+
.filter(|s| !s.is_empty())
1222+
.collect();
1223+
if scheme_type.is_file()
1224+
&& segments.len() == 1
1225+
&& is_normalized_windows_drive_letter(segments[0])
1226+
{
1227+
return;
1228+
}
1229+
// Remove path’s last item.
1230+
self.pop_path(scheme_type, path_start);
1231+
}
1232+
12121233
/// https://url.spec.whatwg.org/#pop-a-urls-path
12131234
fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
12141235
if self.serialization.len() > path_start {
12151236
let slash_position = self.serialization[path_start..].rfind('/').unwrap();
12161237
// + 1 since rfind returns the position before the slash.
12171238
let segment_start = path_start + slash_position + 1;
12181239
// Don’t pop a Windows drive letter
1219-
// FIXME: *normalized* Windows drive letter
12201240
if !(scheme_type.is_file()
1221-
&& is_windows_drive_letter(&self.serialization[segment_start..]))
1241+
&& is_normalized_windows_drive_letter(&self.serialization[segment_start..]))
12221242
{
12231243
self.serialization.truncate(segment_start);
12241244
}

src/quirks.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
122122
Err(_) => return Err(()),
123123
}
124124
}
125+
// Make sure we won't set an empty host to a url with a username or a port
126+
if host == Host::Domain("".to_string()) {
127+
if !username(&url).is_empty() {
128+
return Err(());
129+
}
130+
if let Some(p) = opt_port {
131+
if let Some(_) = p {
132+
return Err(());
133+
}
134+
}
135+
if url.port().is_some() {
136+
return Err(());
137+
}
138+
}
125139
url.set_host_internal(host, opt_port);
126140
Ok(())
127141
}

0 commit comments

Comments
 (0)