@@ -364,9 +364,90 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
364364// Impl for char
365365/////////////////////////////////////////////////////////////////////////////
366366
367+ #[ derive( Clone , Debug ) ]
368+ /// hah
369+ pub struct AsciiCharSearcher < ' a > {
370+ haystack : & ' a str ,
371+ needle : u8 ,
372+ finger : usize ,
373+ finger_back : usize ,
374+ }
375+
376+ unsafe impl < ' a > Searcher < ' a > for AsciiCharSearcher < ' a > {
377+ fn haystack ( & self ) -> & ' a str {
378+ self . haystack
379+ }
380+
381+ #[ inline]
382+ fn next ( & mut self ) -> SearchStep {
383+ let byte = self . haystack . as_bytes ( ) . get ( self . finger ) ;
384+ if let Some ( & byte) = byte {
385+ self . finger += 1 ;
386+ if byte == self . needle {
387+ SearchStep :: Match ( self . finger - 1 , self . finger )
388+ } else {
389+ SearchStep :: Reject ( self . finger - 1 , self . finger )
390+ }
391+ } else {
392+ SearchStep :: Done
393+ }
394+ }
395+
396+ #[ inline( always) ]
397+ fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
398+ match memchr:: memchr ( self . needle , unsafe {
399+ & self . haystack . as_bytes ( ) . get_unchecked ( self . finger ..self . finger_back )
400+ } ) {
401+ Some ( x) => {
402+ self . finger += x + 1 ;
403+ Some ( ( self . finger - 1 , self . finger ) )
404+ }
405+ None => None ,
406+ }
407+ }
408+
409+ // let next_reject use the default implementation from the Searcher trait
410+ }
411+
412+ unsafe impl < ' a > ReverseSearcher < ' a > for AsciiCharSearcher < ' a > {
413+ #[ inline]
414+ fn next_back ( & mut self ) -> SearchStep {
415+ let old_finger = self . finger_back ;
416+ let slice = unsafe { self . haystack . get_unchecked ( self . finger ..old_finger) } ;
417+
418+ let mut iter = slice. as_bytes ( ) . iter ( ) ;
419+ let old_len = iter. len ( ) ;
420+ if let Some ( ch) = iter. next_back ( ) {
421+ self . finger_back -= old_len - iter. len ( ) ;
422+ if * ch == self . needle {
423+ SearchStep :: Match ( self . finger_back , old_finger)
424+ } else {
425+ SearchStep :: Reject ( self . finger_back , old_finger)
426+ }
427+ } else {
428+ SearchStep :: Done
429+ }
430+ }
431+
432+ #[ inline]
433+ fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
434+ match memchr:: memrchr ( self . needle , self . haystack [ self . finger ..self . finger_back ] . as_bytes ( ) )
435+ {
436+ Some ( x) => {
437+ let index = self . finger + x;
438+ self . finger_back = index;
439+ Some ( ( self . finger_back , self . finger_back + 1 ) )
440+ }
441+ None => None ,
442+ }
443+ }
444+
445+ // let next_reject use the default implementation from the Searcher trait
446+ }
447+
367448/// Associated type for `<char as Pattern>::Searcher<'a>`.
368449#[ derive( Clone , Debug ) ]
369- pub struct CharSearcher < ' a > {
450+ pub struct UnicodeCharSearcher < ' a > {
370451 haystack : & ' a str ,
371452 // safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
372453 // This invariant can be broken *within* next_match and next_match_back, however
@@ -391,13 +472,13 @@ pub struct CharSearcher<'a> {
391472 utf8_encoded : [ u8 ; 4 ] ,
392473}
393474
394- impl CharSearcher < ' _ > {
475+ impl UnicodeCharSearcher < ' _ > {
395476 fn utf8_size ( & self ) -> usize {
396477 self . utf8_size . into ( )
397478 }
398479}
399480
400- unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
481+ unsafe impl < ' a > Searcher < ' a > for UnicodeCharSearcher < ' a > {
401482 #[ inline]
402483 fn haystack ( & self ) -> & ' a str {
403484 self . haystack
@@ -450,7 +531,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
450531 //
451532 // However, this is totally okay. While we have the invariant that
452533 // self.finger is on a UTF8 boundary, this invariant is not relied upon
453- // within this method (it is relied upon in CharSearcher ::next()).
534+ // within this method (it is relied upon in UnicodeCharSearcher ::next()).
454535 //
455536 // We only exit this method when we reach the end of the string, or if we
456537 // find something. When we find something the `finger` will be set
@@ -475,7 +556,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
475556 // let next_reject use the default implementation from the Searcher trait
476557}
477558
478- unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
559+ unsafe impl < ' a > ReverseSearcher < ' a > for UnicodeCharSearcher < ' a > {
479560 #[ inline]
480561 fn next_back ( & mut self ) -> SearchStep {
481562 let old_finger = self . finger_back ;
@@ -550,6 +631,57 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
550631}
551632
552633impl < ' a > DoubleEndedSearcher < ' a > for CharSearcher < ' a > { }
634+ #[ derive( Clone , Debug ) ]
635+ ///h
636+ pub enum CharSearcher < ' a > {
637+ ///h
638+ AsciiCharSearcher ( AsciiCharSearcher < ' a > ) ,
639+ ///h
640+ UnicodeCharSearcher ( UnicodeCharSearcher < ' a > ) ,
641+ }
642+ unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
643+ #[ inline]
644+
645+ fn haystack ( & self ) -> & ' a str {
646+ let ( Self :: UnicodeCharSearcher ( UnicodeCharSearcher { haystack, .. } )
647+ | Self :: AsciiCharSearcher ( AsciiCharSearcher { haystack, .. } ) ) = self ;
648+ haystack
649+ }
650+ #[ inline]
651+
652+ fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
653+ match self {
654+ CharSearcher :: AsciiCharSearcher ( x) => x. next_match ( ) ,
655+ CharSearcher :: UnicodeCharSearcher ( x) => x. next_match ( ) ,
656+ }
657+ }
658+ #[ inline]
659+
660+ fn next ( & mut self ) -> SearchStep {
661+ match self {
662+ CharSearcher :: AsciiCharSearcher ( x) => x. next ( ) ,
663+ CharSearcher :: UnicodeCharSearcher ( x) => x. next ( ) ,
664+ }
665+ }
666+ }
667+ unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
668+ #[ inline]
669+
670+ fn next_back ( & mut self ) -> SearchStep {
671+ match self {
672+ CharSearcher :: AsciiCharSearcher ( x) => x. next_back ( ) ,
673+ CharSearcher :: UnicodeCharSearcher ( x) => x. next_back ( ) ,
674+ }
675+ }
676+ #[ inline]
677+
678+ fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
679+ match self {
680+ CharSearcher :: AsciiCharSearcher ( x) => x. next_match_back ( ) ,
681+ CharSearcher :: UnicodeCharSearcher ( x) => x. next_match_back ( ) ,
682+ }
683+ }
684+ }
553685
554686/// Searches for chars that are equal to a given [`char`].
555687///
@@ -563,20 +695,31 @@ impl Pattern for char {
563695
564696 #[ inline]
565697 fn into_searcher < ' a > ( self , haystack : & ' a str ) -> Self :: Searcher < ' a > {
698+ if ( self as u32 ) < 128 { }
566699 let mut utf8_encoded = [ 0 ; MAX_LEN_UTF8 ] ;
567700 let utf8_size = self
568701 . encode_utf8 ( & mut utf8_encoded)
569702 . len ( )
570703 . try_into ( )
571704 . expect ( "char len should be less than 255" ) ;
572-
573- CharSearcher {
574- haystack,
575- finger : 0 ,
576- finger_back : haystack. len ( ) ,
577- needle : self ,
578- utf8_size,
579- utf8_encoded,
705+ if utf8_size == 1 {
706+ CharSearcher :: AsciiCharSearcher ( AsciiCharSearcher {
707+ haystack,
708+ needle : utf8_encoded[ 0 ] ,
709+ finger : 0 ,
710+ finger_back : haystack. len ( ) ,
711+ // available: None,
712+ // available_back: None,
713+ } )
714+ } else {
715+ CharSearcher :: UnicodeCharSearcher ( UnicodeCharSearcher {
716+ haystack,
717+ finger : 0 ,
718+ finger_back : haystack. len ( ) ,
719+ needle : self ,
720+ utf8_size,
721+ utf8_encoded,
722+ } )
580723 }
581724 }
582725
0 commit comments