@@ -4,7 +4,7 @@ use std::simd::{
4
4
simd_swizzle, u8x16, LaneCount , Simd , SupportedLaneCount ,
5
5
} ;
6
6
7
- use crate :: { basic, implementation:: helpers:: SIMD_CHUNK_SIZE } ;
7
+ use crate :: { basic, compat , implementation:: helpers:: SIMD_CHUNK_SIZE } ;
8
8
9
9
#[ cfg( all(
10
10
any( target_arch = "aarch64" , target_arch = "arm" ) ,
@@ -578,6 +578,7 @@ where
578
578
}
579
579
let rem = chunks. remainder ( ) ;
580
580
if !rem. is_ascii ( ) {
581
+ // FIXME: simd???
581
582
let simd_input = SimdInput :: < N , O > :: new_partial ( rem) ;
582
583
algorithm. check_utf8 ( & simd_input) ;
583
584
}
@@ -588,9 +589,87 @@ where
588
589
Ok ( ( ) )
589
590
}
590
591
}
592
+
593
+ #[ inline]
594
+ #[ expect( clippy:: redundant_else) ] // more readable
595
+ fn validate_utf8_compat_simd0 ( input : & [ u8 ] ) -> core:: result:: Result < ( ) , usize > {
596
+ use crate :: implementation:: helpers:: SIMD_CHUNK_SIZE ;
597
+ let mut algorithm = Self :: new ( ) ;
598
+ let mut idx = 0 ;
599
+ let mut chunks = input. chunks_exact ( SIMD_CHUNK_SIZE ) ;
600
+ let mut only_ascii = true ;
601
+
602
+ ' outer: loop {
603
+ if only_ascii {
604
+ while let Some ( chunk) = chunks. next ( ) {
605
+ let simd_input = SimdInput :: new ( chunk) ;
606
+ if !simd_input. is_ascii ( ) {
607
+ algorithm. check_block ( & simd_input) ;
608
+ if algorithm. has_error ( ) {
609
+ return Err ( idx) ;
610
+ } else {
611
+ only_ascii = false ;
612
+ idx += SIMD_CHUNK_SIZE ;
613
+ continue ' outer;
614
+ }
615
+ }
616
+ idx += SIMD_CHUNK_SIZE ;
617
+ }
618
+ } else {
619
+ while let Some ( chunk) = chunks. next ( ) {
620
+ let simd_input = SimdInput :: new ( chunk) ;
621
+ if simd_input. is_ascii ( ) {
622
+ algorithm. check_incomplete_pending ( ) ;
623
+ if algorithm. has_error ( ) {
624
+ return Err ( idx) ;
625
+ } else {
626
+ // we are in pure ASCII territory again
627
+ only_ascii = true ;
628
+ idx += SIMD_CHUNK_SIZE ;
629
+ continue ' outer;
630
+ }
631
+ } else {
632
+ algorithm. check_block ( & simd_input) ;
633
+ if algorithm. has_error ( ) {
634
+ return Err ( idx) ;
635
+ }
636
+ }
637
+ idx += SIMD_CHUNK_SIZE ;
638
+ }
639
+ }
640
+ break ;
641
+ }
642
+ let rem = chunks. remainder ( ) ;
643
+ if !rem. is_ascii ( ) {
644
+ // FIXME: simd???
645
+ let simd_input = SimdInput :: < N , O > :: new_partial ( rem) ;
646
+ algorithm. check_utf8 ( & simd_input) ;
647
+ }
648
+ algorithm. check_incomplete_pending ( ) ;
649
+ if algorithm. has_error ( ) {
650
+ Err ( idx)
651
+ } else {
652
+ Ok ( ( ) )
653
+ }
654
+ }
591
655
}
592
656
593
657
#[ inline]
594
658
pub fn validate_utf8_basic ( input : & [ u8 ] ) -> core:: result:: Result < ( ) , basic:: Utf8Error > {
595
659
Utf8CheckAlgorithm :: < 16 , 4 > :: validate_utf8_basic ( input)
596
660
}
661
+
662
+ /// Validation implementation for CPUs supporting the SIMD extension (see module).
663
+ ///
664
+ /// # Errors
665
+ /// Returns [`compat::Utf8Error`] with detailed error information on failure.
666
+ ///
667
+ /// # Safety
668
+ /// This function is inherently unsafe because it is compiled with SIMD extensions
669
+ /// enabled. Make sure that the CPU supports it before calling.
670
+ ///
671
+ #[ inline]
672
+ pub fn validate_utf8_compat ( input : & [ u8 ] ) -> core:: result:: Result < ( ) , compat:: Utf8Error > {
673
+ Utf8CheckAlgorithm :: < 16 , 4 > :: validate_utf8_compat_simd0 ( input)
674
+ . map_err ( |idx| crate :: implementation:: helpers:: get_compat_error ( input, idx) )
675
+ }
0 commit comments