11use crate :: {
22 imp:: { AuthMeta , Constraints , HostMeta , Meta } ,
3- pct_enc:: { self , table :: * , Table } ,
3+ pct_enc:: { self , encoder :: * , Encoder } ,
44 utf8,
55} ;
66use core:: {
7+ marker:: PhantomData ,
78 num:: NonZeroUsize ,
89 ops:: { Deref , DerefMut } ,
910 str,
@@ -110,7 +111,7 @@ impl DerefMut for Parser<'_> {
110111 }
111112}
112113
113- #[ derive( Clone , Copy ) ]
114+ #[ derive( Clone , Copy , PartialEq , Eq ) ]
114115enum PathKind {
115116 General ,
116117 AbEmpty ,
@@ -152,15 +153,8 @@ impl<'a> Reader<'a> {
152153 debug_assert ! ( self . pos <= self . len( ) ) ;
153154 }
154155
155- // Returns `true` iff any byte is read.
156- fn read ( & mut self , table : Table ) -> Result < bool > {
157- let start = self . pos ;
158- self . read_with ( table, |_, _| { } ) ?;
159- Ok ( self . pos > start)
160- }
161-
162156 #[ cold]
163- fn invalid_pct ( & self ) -> Result < ( ) > {
157+ fn invalid_pct ( & self ) -> Result < bool > {
164158 let mut i = self . pos + 1 ;
165159 if let Some ( & x) = self . bytes . get ( i) {
166160 if pct_enc:: is_hexdig ( x) {
@@ -170,7 +164,18 @@ impl<'a> Reader<'a> {
170164 err ! ( i, UnexpectedCharOrEnd ) ;
171165 }
172166
173- fn read_with ( & mut self , table : Table , mut f : impl FnMut ( usize , u32 ) ) -> Result < ( ) > {
167+ #[ inline( always) ]
168+ fn read < E : Encoder > ( & mut self ) -> Result < bool > {
169+ struct Helper < E : Encoder > {
170+ _marker : PhantomData < E > ,
171+ }
172+
173+ impl < E : Encoder > Helper < E > {
174+ const ALLOWS_PCT_ENCODED : bool = E :: TABLE . allows_pct_encoded ( ) ;
175+ const ALLOWS_NON_ASCII : bool = E :: TABLE . allows_non_ascii ( ) ;
176+ }
177+
178+ let start = self . pos ;
174179 let mut i = self . pos ;
175180
176181 macro_rules! do_loop {
@@ -187,40 +192,34 @@ impl<'a> Reader<'a> {
187192 i += 3 ;
188193 } else if $allow_non_ascii {
189194 let ( x, len) = utf8:: next_code_point( self . bytes, i) ;
190- if !table . allows_code_point( x) {
195+ if !E :: TABLE . allows_code_point( x) {
191196 break ;
192197 }
193- f( i, x) ;
194198 i += len;
195199 } else {
196- if !table . allows_ascii( x) {
200+ if !E :: TABLE . allows_ascii( x) {
197201 break ;
198202 }
199- f( i, x as u32 ) ;
200203 i += 1 ;
201204 }
202205 }
203206 } ;
204207 }
205208
206- // This expansion alone doesn't help much, but combined with
207- // `#[inline(always)]` on `utf8::next_code_point`,
208- // it improves performance significantly for non-ASCII case.
209- if table. allows_pct_encoded ( ) {
210- if table. allows_non_ascii ( ) {
209+ if Helper :: < E > :: ALLOWS_PCT_ENCODED {
210+ if Helper :: < E > :: ALLOWS_NON_ASCII {
211211 do_loop ! ( true , true ) ;
212212 } else {
213213 do_loop ! ( true , false ) ;
214214 }
215- } else if table. allows_non_ascii ( ) {
216- do_loop ! ( false , true ) ;
217215 } else {
216+ assert ! ( !Helper :: <E >:: ALLOWS_NON_ASCII ) ;
218217 do_loop ! ( false , false ) ;
219218 }
220219
221220 // INVARIANT: `i` is non-decreasing.
222221 self . pos = i;
223- Ok ( ( ) )
222+ Ok ( self . pos > start )
224223 }
225224
226225 fn read_str ( & mut self , s : & str ) -> bool {
@@ -411,7 +410,7 @@ impl<'a> Reader<'a> {
411410 if let Some ( b'v' | b'V' ) = self . peek ( 0 ) {
412411 // INVARIANT: Skipping "v" or "V" is fine.
413412 self . skip ( 1 ) ;
414- if self . read ( HEXDIG ) ? && self . read_str ( "." ) && self . read ( IPV_FUTURE ) ? {
413+ if self . read :: < Hexdig > ( ) ? && self . read_str ( "." ) && self . read :: < IpvFuture > ( ) ? {
415414 return Ok ( ( ) ) ;
416415 }
417416 }
@@ -436,23 +435,25 @@ pub(crate) fn parse_v6(bytes: &[u8]) -> [u16; 8] {
436435}
437436
438437impl Parser < ' _ > {
439- fn select < T > ( & self , for_uri : T , for_iri : T ) -> T {
438+ #[ inline( always) ]
439+ fn select_read < U : Encoder , I : Encoder > ( & mut self ) -> Result < bool > {
440440 if self . constraints . ascii_only {
441- for_uri
441+ self . read :: < U > ( )
442442 } else {
443- for_iri
443+ self . read :: < I > ( )
444444 }
445445 }
446446
447447 fn read_v4_or_reg_name ( & mut self ) -> Result < HostMeta > {
448- let reg_name_table = self . select ( REG_NAME , IREG_NAME ) ;
449- Ok ( match ( self . read_v4 ( ) , self . read ( reg_name_table) ?) {
450- ( Some ( _addr) , false ) => HostMeta :: Ipv4 (
451- #[ cfg( feature = "net" ) ]
452- _addr. into ( ) ,
453- ) ,
454- _ => HostMeta :: RegName ,
455- } )
448+ Ok (
449+ match ( self . read_v4 ( ) , self . select_read :: < RegName , IRegName > ( ) ?) {
450+ ( Some ( _addr) , false ) => HostMeta :: Ipv4 (
451+ #[ cfg( feature = "net" ) ]
452+ _addr. into ( ) ,
453+ ) ,
454+ _ => HostMeta :: RegName ,
455+ } ,
456+ )
456457 }
457458
458459 fn read_host ( & mut self ) -> Result < HostMeta > {
@@ -463,7 +464,7 @@ impl Parser<'_> {
463464 }
464465
465466 fn parse_from_scheme ( & mut self ) -> Result < ( ) > {
466- self . read ( SCHEME ) ?;
467+ self . read :: < Scheme > ( ) ?;
467468
468469 if self . peek ( 0 ) == Some ( b':' ) {
469470 // Scheme starts with a letter.
@@ -493,110 +494,78 @@ impl Parser<'_> {
493494 }
494495
495496 fn parse_from_authority ( & mut self ) -> Result < ( ) > {
496- let host;
497+ // We first try to read host and port, noting that
498+ // a reg-name or IPv4address can also be part of userinfo.
499+ let host_start = self . pos ;
500+ let host_meta = self . read_host ( ) ?;
501+
502+ let mut auth_meta = AuthMeta {
503+ host_bounds : ( host_start, self . pos ) ,
504+ host_meta,
505+ } ;
497506
498- let mut colon_cnt = 0 ;
499- let mut colon_idx = 0 ;
507+ self . read_port ( ) ;
500508
501- let auth_start = self . pos ;
509+ if let HostMeta :: Ipv4 ( ..) | HostMeta :: RegName = host_meta {
510+ let userinfo_read = self . select_read :: < Userinfo , IUserinfo > ( ) ?;
502511
503- let userinfo_table = self . select ( USERINFO , IUSERINFO ) ;
504- // `userinfo_table` contains userinfo, registered name, ':', and port.
505- self . read_with ( userinfo_table, |i, x| {
506- if x == ':' as u32 {
507- colon_cnt += 1 ;
508- colon_idx = i;
509- }
510- } ) ?;
512+ if self . peek ( 0 ) == Some ( b'@' ) {
513+ // Userinfo present.
514+ // INVARIANT: Skipping "@" is fine.
515+ self . skip ( 1 ) ;
511516
512- if self . peek ( 0 ) == Some ( b'@' ) {
513- // Userinfo present.
514- // INVARIANT: Skipping "@" is fine.
515- self . skip ( 1 ) ;
517+ let host_start = self . pos ;
518+ let host_meta = self . read_host ( ) ?;
516519
517- let host_start = self . pos ;
518- let meta = self . read_host ( ) ?;
519- host = ( host_start, self . pos , meta) ;
520+ auth_meta = AuthMeta {
521+ host_bounds : ( host_start, self . pos ) ,
522+ host_meta,
523+ } ;
520524
521- self . read_port ( ) ;
522- } else if self . pos == auth_start {
523- // Nothing read. We're now at the start of an IP literal or the path.
524- if let Some ( meta) = self . read_ip_literal ( ) ? {
525- host = ( auth_start, self . pos , meta) ;
526525 self . read_port ( ) ;
527- } else {
528- // Empty authority.
529- host = ( self . pos , self . pos , HostMeta :: RegName ) ;
526+ } else if userinfo_read {
527+ err ! ( self . pos, UnexpectedCharOrEnd ) ;
530528 }
531- } else {
532- // The whole authority read. Try to parse the host and port.
533- let host_end = match colon_cnt {
534- // All host.
535- 0 => self . pos ,
536- // Host and port.
537- 1 => {
538- for i in colon_idx + 1 ..self . pos {
539- if !self . bytes [ i] . is_ascii_digit ( ) {
540- err ! ( i, UnexpectedCharOrEnd ) ;
541- }
542- }
543- colon_idx
544- }
545- // Multiple colons.
546- _ => err ! ( colon_idx, UnexpectedCharOrEnd ) ,
547- } ;
548-
549- let meta = parse_v4_or_reg_name ( & self . bytes [ auth_start..host_end] ) ;
550- host = ( auth_start, host_end, meta) ;
551529 }
552530
553- self . out . auth_meta = Some ( AuthMeta {
554- host_bounds : ( host. 0 , host. 1 ) ,
555- host_meta : host. 2 ,
556- } ) ;
531+ self . out . auth_meta = Some ( auth_meta) ;
557532 self . parse_from_path ( PathKind :: AbEmpty )
558533 }
559534
560535 fn parse_from_path ( & mut self , kind : PathKind ) -> Result < ( ) > {
561- let path_table = self . select ( PATH , IPATH ) ;
562- self . out . path_bounds = match kind {
563- PathKind :: General => {
564- let start = self . pos ;
565- self . read ( path_table) ?;
566- ( start, self . pos )
567- }
568- PathKind :: AbEmpty => {
569- let start = self . pos ;
570- // Either empty or starting with '/'.
571- if self . read ( path_table) ? && self . bytes [ start] != b'/' {
572- err ! ( start, UnexpectedCharOrEnd ) ;
573- }
574- ( start, self . pos )
575- }
536+ let path_start;
537+
538+ match kind {
539+ PathKind :: General | PathKind :: AbEmpty => path_start = self . pos ,
576540 PathKind :: ContinuedNoScheme => {
577- let segment_table = self . select ( SEGMENT_NZ_NC , ISEGMENT_NZ_NC ) ;
578- self . read ( segment_table) ?;
541+ path_start = 0 ;
542+
543+ self . select_read :: < SegmentNzNc , ISegmentNzNc > ( ) ?;
579544
580545 if self . peek ( 0 ) == Some ( b':' ) {
581546 // In a relative reference, the first path
582547 // segment cannot contain a colon character.
583548 err ! ( self . pos, UnexpectedCharOrEnd ) ;
584549 }
585-
586- self . read ( path_table) ?;
587- ( 0 , self . pos )
588550 }
589551 } ;
590552
553+ if self . select_read :: < Path , IPath > ( ) ?
554+ && kind == PathKind :: AbEmpty
555+ && self . bytes [ path_start] != b'/'
556+ {
557+ err ! ( path_start, UnexpectedCharOrEnd ) ;
558+ }
559+
560+ self . out . path_bounds = ( path_start, self . pos ) ;
561+
591562 if self . read_str ( "?" ) {
592- let query_table = self . select ( QUERY , IQUERY ) ;
593- self . read ( query_table) ?;
563+ self . select_read :: < Query , IQuery > ( ) ?;
594564 self . out . query_end = NonZeroUsize :: new ( self . pos ) ;
595565 }
596566
597567 if self . read_str ( "#" ) {
598- let fragment_table = self . select ( FRAGMENT , IFRAGMENT ) ;
599- self . read ( fragment_table) ?;
568+ self . select_read :: < Fragment , IFragment > ( ) ?;
600569 }
601570
602571 if self . has_remaining ( ) {
0 commit comments