Skip to content

Commit cd5f0ae

Browse files
committed
tryouts to rewrite parse path
1 parent 7c70578 commit cd5f0ae

File tree

3 files changed

+106
-15
lines changed

3 files changed

+106
-15
lines changed

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ impl Url {
490490
Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
491491
);
492492
}
493-
assert_eq!(self.byte_at(self.path_start), b'/');
493+
//assert!(self.path_start as usize == self.serialization.len() || self.byte_at(self.path_start) == b'/');
494494
} else {
495495
// Anarchist URL (no authority)
496496
assert_eq!(self.username_end, self.scheme_end + 1);

src/parser.rs

Lines changed: 96 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,20 +1042,100 @@ impl<'a> Parser<'a> {
10421042
has_host: &mut bool,
10431043
mut input: Input<'i>,
10441044
) -> Input<'i> {
1045-
// Path start state
1046-
match input.split_first() {
1047-
(Some('/'), remaining) => input = remaining,
1048-
(Some('\\'), remaining) => {
1049-
if scheme_type.is_special() {
1050-
self.log_violation(SyntaxViolation::Backslash);
1051-
input = remaining
1045+
let path_start = self.serialization.len();
1046+
let (maybe_c, remaining) = input.split_first();
1047+
// If url is special, then:
1048+
if scheme_type.is_special() {
1049+
// If c is U+005C (\), validation error.
1050+
if maybe_c == Some('\\') {
1051+
self.log_violation(SyntaxViolation::Backslash);
1052+
}
1053+
// If c is neither U+002F (/) nor U+005C (\), then decrease pointer by one.
1054+
if maybe_c == Some('/') || maybe_c == Some('\\') {
1055+
input = remaining;
1056+
}
1057+
// Set state to path state.
1058+
return self.parse_path(scheme_type, has_host, path_start, input);
1059+
} else if maybe_c == Some('?') {
1060+
// Otherwise, if state override is not given and c is U+003F (?),
1061+
// set url’s query to the empty string and state to query state.
1062+
return self.parse_query_2(scheme_type, remaining);
1063+
} else if maybe_c == Some('#') {
1064+
// Otherwise, if state override is not given and c is U+0023 (#),
1065+
// set url’s fragment to the empty string and state to fragment state.
1066+
return self.parse_fragment_2(remaining);
1067+
}
1068+
// Otherwise, if c is not the EOF code point:
1069+
if !remaining.is_empty() {
1070+
if maybe_c == Some('/') {
1071+
return self.parse_path(scheme_type, has_host, path_start, input);
1072+
} else {
1073+
// If c is not U+002F (/), then decrease pointer by one.
1074+
return self.parse_path(scheme_type, has_host, path_start, remaining);
1075+
}
1076+
}
1077+
input
1078+
}
1079+
1080+
pub fn parse_query_2<'i>(
1081+
&mut self,
1082+
scheme_type: SchemeType,
1083+
mut input: Input<'i>,
1084+
) -> Input<'i> {
1085+
let mut query = String::new(); // FIXME: use a streaming decoder instead
1086+
1087+
while let Some((c, _)) = input.next_utf8() {
1088+
match c {
1089+
// If state override is not given and c is U+0023 (#),
1090+
// then set url’s fragment to the empty string and state to fragment state.
1091+
'#' => return self.parse_fragment_2(input),
1092+
c => {
1093+
// If c is not a URL code point and not U+0025 (%), validation error.
1094+
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1095+
self.check_url_code_point(c, &input);
1096+
query.push(c);
10521097
}
10531098
}
1054-
_ => {}
10551099
}
1056-
let path_start = self.serialization.len();
1057-
self.serialization.push('/');
1058-
self.parse_path(scheme_type, has_host, path_start, input)
1100+
1101+
// If encoding is not UTF-8 and one of the following is true
1102+
// url is not special
1103+
// url’s scheme is "ws" or "wss"
1104+
let encoding = if !scheme_type.is_special()
1105+
|| self.serialization.starts_with("ws")
1106+
|| self.serialization.starts_with("wss")
1107+
{
1108+
self.query_encoding_override
1109+
} else {
1110+
None
1111+
};
1112+
let query_bytes = ::query_encoding::encode(encoding, &query);
1113+
let set = if scheme_type.is_special() {
1114+
SPECIAL_QUERY
1115+
} else {
1116+
QUERY
1117+
};
1118+
self.serialization.extend(percent_encode(&query_bytes, set));
1119+
input
1120+
}
1121+
1122+
pub fn parse_fragment_2<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1123+
while let Some((c, _)) = input.next_utf8() {
1124+
match c {
1125+
// U+0000 NULL: Validation error.
1126+
'\0' => self.log_violation(SyntaxViolation::NullInFragment),
1127+
c => {
1128+
// If c is not a URL code point and not U+0025 (%), validation error.
1129+
// If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1130+
self.check_url_code_point(c, &input);
1131+
// UTF-8 percent encode c using the fragment percent-encode set
1132+
// and append the result to url’s fragment.
1133+
self.serialization
1134+
.extend(utf8_percent_encode(&c.to_string(), FRAGMENT));
1135+
}
1136+
}
1137+
}
1138+
input
10591139
}
10601140

10611141
pub fn parse_path<'i>(
@@ -1065,8 +1145,10 @@ impl<'a> Parser<'a> {
10651145
path_start: usize,
10661146
mut input: Input<'i>,
10671147
) -> Input<'i> {
1148+
if !self.serialization.ends_with('/') && scheme_type.is_special() && !input.is_empty() {
1149+
self.serialization.push('/');
1150+
}
10681151
// Relative path state
1069-
debug_assert!(self.serialization.ends_with('/'));
10701152
loop {
10711153
let segment_start = self.serialization.len();
10721154
let mut ends_with_slash = false;
@@ -1079,13 +1161,15 @@ impl<'a> Parser<'a> {
10791161
};
10801162
match c {
10811163
'/' if self.context != Context::PathSegmentSetter => {
1164+
self.serialization.push(c);
10821165
ends_with_slash = true;
10831166
break;
10841167
}
10851168
'\\' if self.context != Context::PathSegmentSetter
10861169
&& scheme_type.is_special() =>
10871170
{
10881171
self.log_violation(SyntaxViolation::Backslash);
1172+
self.serialization.push(c);
10891173
ends_with_slash = true;
10901174
break;
10911175
}

src/quirks.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,15 @@ pub fn pathname(url: &Url) -> &str {
186186

187187
/// Setter for https://url.spec.whatwg.org/#dom-url-pathname
188188
pub fn set_pathname(url: &mut Url, new_pathname: &str) {
189-
if !url.cannot_be_a_base() {
190-
url.set_path(new_pathname)
189+
if !url.cannot_be_a_base() && !new_pathname.is_empty() {
190+
if !SchemeType::from(url.scheme()).is_special() || Some('/') == new_pathname.chars().nth(0)
191+
{
192+
url.set_path(new_pathname)
193+
} else {
194+
let mut path_to_set = String::from("/");
195+
path_to_set.push_str(new_pathname);
196+
url.set_path(&path_to_set)
197+
}
191198
}
192199
}
193200

0 commit comments

Comments
 (0)