@@ -6,7 +6,7 @@ use crate::{
66 pct_enc:: {
77 self ,
88 encoder:: { Data , IData } ,
9- Decode , DecodedChunk , DecodedUtf8Chunk , Encode , EncodedChunk , Encoder , Table ,
9+ Decode , DecodedChunk , DecodedUtf8Chunk , Encode , EncodedChunk , Encoder ,
1010 } ,
1111 resolve,
1212} ;
@@ -149,13 +149,6 @@ pub(crate) fn normalize(
149149 // For "a://[::ffff:5:9]/" the capacity is not enough,
150150 // but it's fine since this rarely happens.
151151 let mut buf = String :: with_capacity ( r. as_str ( ) . len ( ) ) ;
152-
153- let data_table = if ascii_only {
154- Data :: TABLE
155- } else {
156- IData :: TABLE
157- } ;
158-
159152 let mut meta = Meta :: default ( ) ;
160153
161154 if let Some ( scheme) = r. scheme_opt ( ) {
@@ -169,7 +162,7 @@ pub(crate) fn normalize(
169162 buf. push_str ( "//" ) ;
170163
171164 if let Some ( userinfo) = auth. userinfo ( ) {
172- normalize_estr ( & mut buf, userinfo. as_str ( ) , false , data_table ) ;
165+ normalize_estr ( & mut buf, userinfo. as_str ( ) , false , ascii_only ) ;
173166 buf. push ( '@' ) ;
174167 }
175168
@@ -195,7 +188,7 @@ pub(crate) fn normalize(
195188 HostMeta :: RegName => {
196189 let start = buf. len ( ) ;
197190 let host = auth. host ( ) ;
198- normalize_estr ( & mut buf, host, true , data_table ) ;
191+ normalize_estr ( & mut buf, host, true , ascii_only ) ;
199192
200193 if buf. len ( ) < start + host. len ( ) {
201194 // Only reparse when the length is less than before.
@@ -229,7 +222,7 @@ pub(crate) fn normalize(
229222
230223 if r. has_scheme ( ) && path. starts_with ( '/' ) {
231224 let mut path_buf = String :: with_capacity ( path. len ( ) ) ;
232- normalize_estr ( & mut path_buf, path, false , data_table ) ;
225+ normalize_estr ( & mut path_buf, path, false , ascii_only ) ;
233226
234227 let underflow_occurred = resolve:: remove_dot_segments ( & mut buf, & [ & path_buf] ) ;
235228 if underflow_occurred && !allow_path_underflow {
@@ -242,31 +235,58 @@ pub(crate) fn normalize(
242235 }
243236 } else {
244237 // Don't remove dot segments from relative reference or rootless path.
245- normalize_estr ( & mut buf, path, false , data_table ) ;
238+ normalize_estr ( & mut buf, path, false , ascii_only ) ;
246239 }
247240
248241 meta. path_bounds . 1 = buf. len ( ) ;
249242
250243 if let Some ( query) = r. query ( ) {
244+ // We might as well decode percent-encoded private characters in query,
245+ // but Section 5.3.2.3 of RFC 3987 says:
246+ //
247+ // These IRIs should be normalized by decoding any
248+ // percent-encoded octet sequence that corresponds to an unreserved
249+ // character, as described in section 2.3 of [RFC3986].
250+ //
251+ // Also, the whole spec is quite ambiguous on whether private characters
252+ // are unreserved or not. So we just leave them percent-encoded for now.
251253 buf. push ( '?' ) ;
252-
253- const IQUERY_DATA : Table = IData :: TABLE . or_iprivate ( ) ;
254- let query_data_table = if ascii_only { Data :: TABLE } else { IQUERY_DATA } ;
255-
256- normalize_estr ( & mut buf, query. as_str ( ) , false , query_data_table) ;
254+ normalize_estr ( & mut buf, query. as_str ( ) , false , ascii_only) ;
257255 meta. query_end = NonZeroUsize :: new ( buf. len ( ) ) ;
258256 }
259257
260258 if let Some ( fragment) = r. fragment ( ) {
261259 buf. push ( '#' ) ;
262- normalize_estr ( & mut buf, fragment. as_str ( ) , false , data_table ) ;
260+ normalize_estr ( & mut buf, fragment. as_str ( ) , false , ascii_only ) ;
263261 }
264262
265263 Ok ( ( buf, meta) )
266264}
267265
268- fn normalize_estr ( buf : & mut String , s : & str , to_ascii_lowercase : bool , table : Table ) {
269- if table. allows_non_ascii ( ) {
266+ fn normalize_estr ( buf : & mut String , s : & str , to_ascii_lowercase : bool , ascii_only : bool ) {
267+ if ascii_only {
268+ for chunk in Decode :: new ( s) {
269+ match chunk {
270+ DecodedChunk :: Unencoded ( s) => {
271+ let i = buf. len ( ) ;
272+ buf. push_str ( s) ;
273+ if to_ascii_lowercase {
274+ buf[ i..] . make_ascii_lowercase ( ) ;
275+ }
276+ }
277+ DecodedChunk :: PctDecoded ( mut x) => {
278+ if Data :: TABLE . allows_ascii ( x) {
279+ if to_ascii_lowercase {
280+ x. make_ascii_lowercase ( ) ;
281+ }
282+ buf. push ( x as char ) ;
283+ } else {
284+ buf. push_str ( pct_enc:: encode_byte ( x) ) ;
285+ }
286+ }
287+ }
288+ }
289+ } else {
270290 Decode :: new ( s) . decode_utf8 ( |chunk| match chunk {
271291 DecodedUtf8Chunk :: Unencoded ( s) => {
272292 let i = buf. len ( ) ;
@@ -276,7 +296,7 @@ fn normalize_estr(buf: &mut String, s: &str, to_ascii_lowercase: bool, table: Ta
276296 }
277297 }
278298 DecodedUtf8Chunk :: Decoded { valid, invalid } => {
279- for chunk in Encode :: new ( table , valid) {
299+ for chunk in Encode :: new ( IData :: TABLE , valid) {
280300 match chunk {
281301 EncodedChunk :: Unencoded ( s) => {
282302 let i = buf. len ( ) ;
@@ -293,28 +313,6 @@ fn normalize_estr(buf: &mut String, s: &str, to_ascii_lowercase: bool, table: Ta
293313 }
294314 }
295315 } ) ;
296- } else {
297- for chunk in Decode :: new ( s) {
298- match chunk {
299- DecodedChunk :: Unencoded ( s) => {
300- let i = buf. len ( ) ;
301- buf. push_str ( s) ;
302- if to_ascii_lowercase {
303- buf[ i..] . make_ascii_lowercase ( ) ;
304- }
305- }
306- DecodedChunk :: PctDecoded ( mut x) => {
307- if table. allows_ascii ( x) {
308- if to_ascii_lowercase {
309- x. make_ascii_lowercase ( ) ;
310- }
311- buf. push ( x as char ) ;
312- } else {
313- buf. push_str ( pct_enc:: encode_byte ( x) ) ;
314- }
315- }
316- }
317- }
318316 }
319317}
320318
0 commit comments