Skip to content

Commit 17fb65c

Browse files
committed
compat impl
1 parent 556d85e commit 17fb65c

File tree

1 file changed

+80
-1
lines changed

1 file changed

+80
-1
lines changed

portable/src/implementation/portable/algorithm_new.rs

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::simd::{
44
simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount,
55
};
66

7-
use crate::{basic, implementation::helpers::SIMD_CHUNK_SIZE};
7+
use crate::{basic, compat, implementation::helpers::SIMD_CHUNK_SIZE};
88

99
#[cfg(all(
1010
any(target_arch = "aarch64", target_arch = "arm"),
@@ -578,6 +578,7 @@ where
578578
}
579579
let rem = chunks.remainder();
580580
if !rem.is_ascii() {
581+
// FIXME: simd???
581582
let simd_input = SimdInput::<N, O>::new_partial(rem);
582583
algorithm.check_utf8(&simd_input);
583584
}
@@ -588,9 +589,87 @@ where
588589
Ok(())
589590
}
590591
}
592+
593+
#[inline]
594+
#[expect(clippy::redundant_else)] // more readable
595+
fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> {
596+
use crate::implementation::helpers::SIMD_CHUNK_SIZE;
597+
let mut algorithm = Self::new();
598+
let mut idx = 0;
599+
let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE);
600+
let mut only_ascii = true;
601+
602+
'outer: loop {
603+
if only_ascii {
604+
while let Some(chunk) = chunks.next() {
605+
let simd_input = SimdInput::new(chunk);
606+
if !simd_input.is_ascii() {
607+
algorithm.check_block(&simd_input);
608+
if algorithm.has_error() {
609+
return Err(idx);
610+
} else {
611+
only_ascii = false;
612+
idx += SIMD_CHUNK_SIZE;
613+
continue 'outer;
614+
}
615+
}
616+
idx += SIMD_CHUNK_SIZE;
617+
}
618+
} else {
619+
while let Some(chunk) = chunks.next() {
620+
let simd_input = SimdInput::new(chunk);
621+
if simd_input.is_ascii() {
622+
algorithm.check_incomplete_pending();
623+
if algorithm.has_error() {
624+
return Err(idx);
625+
} else {
626+
// we are in pure ASCII territory again
627+
only_ascii = true;
628+
idx += SIMD_CHUNK_SIZE;
629+
continue 'outer;
630+
}
631+
} else {
632+
algorithm.check_block(&simd_input);
633+
if algorithm.has_error() {
634+
return Err(idx);
635+
}
636+
}
637+
idx += SIMD_CHUNK_SIZE;
638+
}
639+
}
640+
break;
641+
}
642+
let rem = chunks.remainder();
643+
if !rem.is_ascii() {
644+
// FIXME: simd???
645+
let simd_input = SimdInput::<N, O>::new_partial(rem);
646+
algorithm.check_utf8(&simd_input);
647+
}
648+
algorithm.check_incomplete_pending();
649+
if algorithm.has_error() {
650+
Err(idx)
651+
} else {
652+
Ok(())
653+
}
654+
}
591655
}
592656

593657
#[inline]
594658
pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> {
595659
Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input)
596660
}
661+
662+
/// Validation implementation for CPUs supporting the SIMD extension (see module).
663+
///
664+
/// # Errors
665+
/// Returns [`compat::Utf8Error`] with detailed error information on failure.
666+
///
667+
/// # Safety
668+
/// This function is inherently unsafe because it is compiled with SIMD extensions
669+
/// enabled. Make sure that the CPU supports it before calling.
670+
///
671+
#[inline]
672+
pub fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Utf8Error> {
673+
Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input)
674+
.map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx))
675+
}

0 commit comments

Comments
 (0)