@@ -1042,20 +1042,100 @@ impl<'a> Parser<'a> {
1042
1042
has_host : & mut bool ,
1043
1043
mut input : Input < ' i > ,
1044
1044
) -> Input < ' i > {
1045
- // Path start state
1046
- match input. split_first ( ) {
1047
- ( Some ( '/' ) , remaining) => input = remaining,
1048
- ( Some ( '\\' ) , remaining) => {
1049
- if scheme_type. is_special ( ) {
1050
- self . log_violation ( SyntaxViolation :: Backslash ) ;
1051
- input = remaining
1045
+ let path_start = self . serialization . len ( ) ;
1046
+ let ( maybe_c, remaining) = input. split_first ( ) ;
1047
+ // If url is special, then:
1048
+ if scheme_type. is_special ( ) {
1049
+ // If c is U+005C (\), validation error.
1050
+ if maybe_c == Some ( '\\' ) {
1051
+ self . log_violation ( SyntaxViolation :: Backslash ) ;
1052
+ }
1053
+ // If c is neither U+002F (/) nor U+005C (\), then decrease pointer by one.
1054
+ if maybe_c == Some ( '/' ) || maybe_c == Some ( '\\' ) {
1055
+ input = remaining;
1056
+ }
1057
+ // Set state to path state.
1058
+ return self . parse_path ( scheme_type, has_host, path_start, input) ;
1059
+ } else if maybe_c == Some ( '?' ) {
1060
+ // Otherwise, if state override is not given and c is U+003F (?),
1061
+ // set url’s query to the empty string and state to query state.
1062
+ return self . parse_query_2 ( scheme_type, remaining) ;
1063
+ } else if maybe_c == Some ( '#' ) {
1064
+ // Otherwise, if state override is not given and c is U+0023 (#),
1065
+ // set url’s fragment to the empty string and state to fragment state.
1066
+ return self . parse_fragment_2 ( remaining) ;
1067
+ }
1068
+ // Otherwise, if c is not the EOF code point:
1069
+ if !remaining. is_empty ( ) {
1070
+ if maybe_c == Some ( '/' ) {
1071
+ return self . parse_path ( scheme_type, has_host, path_start, input) ;
1072
+ } else {
1073
+ // If c is not U+002F (/), then decrease pointer by one.
1074
+ return self . parse_path ( scheme_type, has_host, path_start, remaining) ;
1075
+ }
1076
+ }
1077
+ input
1078
+ }
1079
+
1080
+ pub fn parse_query_2 < ' i > (
1081
+ & mut self ,
1082
+ scheme_type : SchemeType ,
1083
+ mut input : Input < ' i > ,
1084
+ ) -> Input < ' i > {
1085
+ let mut query = String :: new ( ) ; // FIXME: use a streaming decoder instead
1086
+
1087
+ while let Some ( ( c, _) ) = input. next_utf8 ( ) {
1088
+ match c {
1089
+ // If state override is not given and c is U+0023 (#),
1090
+ // then set url’s fragment to the empty string and state to fragment state.
1091
+ '#' => return self . parse_fragment_2 ( input) ,
1092
+ c => {
1093
+ // If c is not a URL code point and not U+0025 (%), validation error.
1094
+ // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1095
+ self . check_url_code_point ( c, & input) ;
1096
+ query. push ( c) ;
1052
1097
}
1053
1098
}
1054
- _ => { }
1055
1099
}
1056
- let path_start = self . serialization . len ( ) ;
1057
- self . serialization . push ( '/' ) ;
1058
- self . parse_path ( scheme_type, has_host, path_start, input)
1100
+
1101
+ // If encoding is not UTF-8 and one of the following is true
1102
+ // url is not special
1103
+ // url’s scheme is "ws" or "wss"
1104
+ let encoding = if !scheme_type. is_special ( )
1105
+ || self . serialization . starts_with ( "ws" )
1106
+ || self . serialization . starts_with ( "wss" )
1107
+ {
1108
+ self . query_encoding_override
1109
+ } else {
1110
+ None
1111
+ } ;
1112
+ let query_bytes = :: query_encoding:: encode ( encoding, & query) ;
1113
+ let set = if scheme_type. is_special ( ) {
1114
+ SPECIAL_QUERY
1115
+ } else {
1116
+ QUERY
1117
+ } ;
1118
+ self . serialization . extend ( percent_encode ( & query_bytes, set) ) ;
1119
+ input
1120
+ }
1121
+
1122
+ pub fn parse_fragment_2 < ' i > ( & mut self , mut input : Input < ' i > ) -> Input < ' i > {
1123
+ while let Some ( ( c, _) ) = input. next_utf8 ( ) {
1124
+ match c {
1125
+ // U+0000 NULL: Validation error.
1126
+ '\0' => self . log_violation ( SyntaxViolation :: NullInFragment ) ,
1127
+ c => {
1128
+ // If c is not a URL code point and not U+0025 (%), validation error.
1129
+ // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
1130
+ self . check_url_code_point ( c, & input) ;
1131
+ // UTF-8 percent encode c using the fragment percent-encode set
1132
+ // and append the result to url’s fragment.
1133
+ self . serialization
1134
+ . extend ( utf8_percent_encode ( & c. to_string ( ) , FRAGMENT ) ) ;
1135
+ }
1136
+ }
1137
+ }
1138
+ input
1059
1139
}
1060
1140
1061
1141
pub fn parse_path < ' i > (
@@ -1065,8 +1145,10 @@ impl<'a> Parser<'a> {
1065
1145
path_start : usize ,
1066
1146
mut input : Input < ' i > ,
1067
1147
) -> Input < ' i > {
1148
+ if !self . serialization . ends_with ( '/' ) && scheme_type. is_special ( ) && !input. is_empty ( ) {
1149
+ self . serialization . push ( '/' ) ;
1150
+ }
1068
1151
// Relative path state
1069
- debug_assert ! ( self . serialization. ends_with( '/' ) ) ;
1070
1152
loop {
1071
1153
let segment_start = self . serialization . len ( ) ;
1072
1154
let mut ends_with_slash = false ;
@@ -1079,13 +1161,15 @@ impl<'a> Parser<'a> {
1079
1161
} ;
1080
1162
match c {
1081
1163
'/' if self . context != Context :: PathSegmentSetter => {
1164
+ self . serialization . push ( c) ;
1082
1165
ends_with_slash = true ;
1083
1166
break ;
1084
1167
}
1085
1168
'\\' if self . context != Context :: PathSegmentSetter
1086
1169
&& scheme_type. is_special ( ) =>
1087
1170
{
1088
1171
self . log_violation ( SyntaxViolation :: Backslash ) ;
1172
+ self . serialization . push ( c) ;
1089
1173
ends_with_slash = true ;
1090
1174
break ;
1091
1175
}
0 commit comments