Skip to content

Commit a562a70

Browse files
committed
public imp
1 parent 6f47f13 commit a562a70

File tree

4 files changed

+107
-154
lines changed

4 files changed

+107
-154
lines changed

portable/src/basic.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ pub mod imp {
7474
///
7575
/// General usage:
7676
/// ```rust
77-
/// use simdutf8::basic::imp::Utf8Validator;
77+
/// use simdutf8_portable::basic::imp::Utf8Validator;
7878
/// use std::io::{stdin, Read, Result};
7979
///
8080
/// # #[cfg(target_arch = "x86_64")]
@@ -198,16 +198,16 @@ pub mod imp {
198198
pub mod portable {
199199
/// Includes the validation implementation using 128-bit portable SIMD.
200200
pub mod simd128 {
201+
pub use crate::implementation::portable::simd128::validate_utf8_basic as validate_utf8;
201202
pub use crate::implementation::portable::simd128::ChunkedUtf8ValidatorImp;
202203
pub use crate::implementation::portable::simd128::Utf8ValidatorImp;
203-
pub use crate::implementation::portable::simd128::validate_utf8_basic as validate_utf8;
204204
}
205205

206206
/// Includes the validation implementation using 256-bit portable SIMD.
207207
pub mod simd256 {
208+
pub use crate::implementation::portable::simd256::validate_utf8_basic as validate_utf8;
208209
pub use crate::implementation::portable::simd256::ChunkedUtf8ValidatorImp;
209210
pub use crate::implementation::portable::simd256::Utf8ValidatorImp;
210-
pub use crate::implementation::portable::simd256::validate_utf8_basic as validate_utf8;
211211
}
212212
}
213213
}

portable/src/compat.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> {
102102
/// Allows direct access to the platform-specific unsafe validation implementations.
103103
#[cfg(feature = "public_imp")]
104104
pub mod imp {
105+
/// FIXME: add docs
105106
pub mod portable {
106107
/// Includes the validation implementation for 128-bit portable SIMD.
107108
pub mod simd128 {

portable/src/implementation/portable/algorithm_safe.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,3 +673,73 @@ pub fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Ut
673673
Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input)
674674
.map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx))
675675
}
676+
677+
/// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait.
678+
///
679+
/// This is implementation requires CPU SIMD features specified by the module it resides in.
680+
/// It is undefined behavior to call it if the required CPU features are not
681+
/// available.
682+
#[cfg(feature = "public_imp")]
683+
pub struct ChunkedUtf8ValidatorImp {
684+
algorithm: Utf8CheckAlgorithm<16, 4>,
685+
}
686+
687+
#[cfg(feature = "public_imp")]
688+
impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp {
689+
#[inline]
690+
#[must_use]
691+
unsafe fn new() -> Self {
692+
Self {
693+
algorithm: Utf8CheckAlgorithm::<16, 4>::new(),
694+
}
695+
}
696+
697+
#[inline]
698+
unsafe fn update_from_chunks(&mut self, input: &[u8]) {
699+
use crate::implementation::helpers::SIMD_CHUNK_SIZE;
700+
701+
assert!(
702+
input.len() % SIMD_CHUNK_SIZE == 0,
703+
"Input size must be a multiple of 64."
704+
);
705+
for chunk in input.chunks_exact(SIMD_CHUNK_SIZE) {
706+
let input = SimdInput::new(chunk);
707+
self.algorithm.check_utf8(&input);
708+
}
709+
}
710+
711+
#[inline]
712+
unsafe fn finalize(
713+
mut self,
714+
remaining_input: core::option::Option<&[u8]>,
715+
) -> core::result::Result<(), basic::Utf8Error> {
716+
use crate::implementation::helpers::SIMD_CHUNK_SIZE;
717+
718+
if let Some(mut remaining_input) = remaining_input {
719+
if !remaining_input.is_empty() {
720+
let len = remaining_input.len();
721+
let chunks_lim = len - (len % SIMD_CHUNK_SIZE);
722+
if chunks_lim > 0 {
723+
self.update_from_chunks(&remaining_input[..chunks_lim]);
724+
}
725+
let rem = len - chunks_lim;
726+
if rem > 0 {
727+
remaining_input = &remaining_input[chunks_lim..];
728+
let mut tmpbuf = TempSimdChunk::new();
729+
tmpbuf
730+
.0
731+
.as_mut_ptr()
732+
.copy_from_nonoverlapping(remaining_input.as_ptr(), remaining_input.len());
733+
let simd_input = SimdInput::new(&tmpbuf.0);
734+
self.algorithm.check_utf8(&simd_input);
735+
}
736+
}
737+
}
738+
self.algorithm.check_incomplete_pending();
739+
if self.algorithm.has_error() {
740+
Err(basic::Utf8Error {})
741+
} else {
742+
Ok(())
743+
}
744+
}
745+
}

portable/tests/tests.rs

Lines changed: 33 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -68,155 +68,72 @@ mod public_imp {
6868

6969
#[allow(unused_variables)] // nothing to do if not SIMD implementation is available
7070
pub(super) fn test_valid(input: &[u8]) {
71-
if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
72-
#[cfg(target_feature = "avx2")]
73-
unsafe {
74-
assert!(simdutf8::basic::imp::x86::avx2::validate_utf8(input).is_ok());
75-
assert!(simdutf8::compat::imp::x86::avx2::validate_utf8(input).is_ok());
76-
77-
test_streaming::<simdutf8::basic::imp::x86::avx2::Utf8ValidatorImp>(input, true);
78-
test_chunked_streaming::<simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp>(
79-
input, true,
80-
);
81-
}
82-
83-
#[cfg(target_feature = "sse4.2")]
84-
unsafe {
85-
assert!(simdutf8::basic::imp::x86::sse42::validate_utf8(input).is_ok());
86-
assert!(simdutf8::compat::imp::x86::sse42::validate_utf8(input).is_ok());
87-
88-
test_streaming::<simdutf8::basic::imp::x86::sse42::Utf8ValidatorImp>(input, true);
89-
test_chunked_streaming::<simdutf8::basic::imp::x86::sse42::ChunkedUtf8ValidatorImp>(
90-
input, true,
91-
);
92-
}
93-
}
94-
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
71+
#[cfg(feature = "public_imp")]
9572
unsafe {
96-
assert!(simdutf8::basic::imp::aarch64::neon::validate_utf8(input).is_ok());
97-
assert!(simdutf8::compat::imp::aarch64::neon::validate_utf8(input).is_ok());
98-
99-
test_streaming::<simdutf8::basic::imp::aarch64::neon::Utf8ValidatorImp>(input, true);
100-
test_chunked_streaming::<simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp>(
101-
input, true,
73+
assert!(simdutf8_portable::basic::imp::portable::simd128::validate_utf8(input).is_ok());
74+
assert!(
75+
simdutf8_portable::compat::imp::portable::simd128::validate_utf8(input).is_ok()
10276
);
103-
}
104-
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
105-
unsafe {
106-
assert!(simdutf8::basic::imp::wasm32::simd128::validate_utf8(input).is_ok());
107-
assert!(simdutf8::compat::imp::wasm32::simd128::validate_utf8(input).is_ok());
10877

109-
test_streaming::<simdutf8::basic::imp::wasm32::simd128::Utf8ValidatorImp>(input, true);
110-
test_chunked_streaming::<simdutf8::basic::imp::wasm32::simd128::ChunkedUtf8ValidatorImp>(
111-
input, true,
112-
);
113-
}
114-
#[cfg(feature = "portable_public_imp")]
115-
unsafe {
116-
assert!(simdutf8::basic::imp::portable::simd128::validate_utf8(input).is_ok());
117-
assert!(simdutf8::compat::imp::portable::simd128::validate_utf8(input).is_ok());
118-
119-
test_streaming::<simdutf8::basic::imp::portable::simd128::Utf8ValidatorImp>(
78+
test_streaming::<simdutf8_portable::basic::imp::portable::simd128::Utf8ValidatorImp>(
12079
input, true,
12180
);
12281
test_chunked_streaming::<
123-
simdutf8::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp,
82+
simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp,
12483
>(input, true);
12584

126-
assert!(simdutf8::basic::imp::portable::simd256::validate_utf8(input).is_ok());
127-
assert!(simdutf8::compat::imp::portable::simd256::validate_utf8(input).is_ok());
85+
assert!(simdutf8_portable::basic::imp::portable::simd256::validate_utf8(input).is_ok());
86+
assert!(
87+
simdutf8_portable::compat::imp::portable::simd256::validate_utf8(input).is_ok()
88+
);
12889

129-
test_streaming::<simdutf8::basic::imp::portable::simd256::Utf8ValidatorImp>(
90+
test_streaming::<simdutf8_portable::basic::imp::portable::simd256::Utf8ValidatorImp>(
13091
input, true,
13192
);
13293
test_chunked_streaming::<
133-
simdutf8::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp,
94+
simdutf8_portable::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp,
13495
>(input, true);
13596
}
13697
}
13798

13899
#[allow(unused_variables)] // nothing to do if not SIMD implementation is available
139100
pub(super) fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option<usize>) {
140-
if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
141-
#[cfg(target_feature = "avx2")]
142-
unsafe {
143-
assert!(simdutf8::basic::imp::x86::avx2::validate_utf8(input).is_err());
144-
let err = simdutf8::compat::imp::x86::avx2::validate_utf8(input).unwrap_err();
145-
assert_eq!(err.valid_up_to(), valid_up_to);
146-
assert_eq!(err.error_len(), error_len);
147-
148-
test_streaming::<simdutf8::basic::imp::x86::avx2::Utf8ValidatorImp>(input, false);
149-
test_chunked_streaming::<simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp>(
150-
input, false,
151-
);
152-
}
153-
#[cfg(target_feature = "sse4.2")]
154-
unsafe {
155-
assert!(simdutf8::basic::imp::x86::sse42::validate_utf8(input).is_err());
156-
let err = simdutf8::compat::imp::x86::sse42::validate_utf8(input).unwrap_err();
157-
assert_eq!(err.valid_up_to(), valid_up_to);
158-
assert_eq!(err.error_len(), error_len);
159-
160-
test_streaming::<simdutf8::basic::imp::x86::sse42::Utf8ValidatorImp>(input, false);
161-
test_chunked_streaming::<simdutf8::basic::imp::x86::sse42::ChunkedUtf8ValidatorImp>(
162-
input, false,
163-
);
164-
}
165-
}
166-
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
167-
unsafe {
168-
assert!(simdutf8::basic::imp::aarch64::neon::validate_utf8(input).is_err());
169-
let err = simdutf8::compat::imp::aarch64::neon::validate_utf8(input).unwrap_err();
170-
assert_eq!(err.valid_up_to(), valid_up_to);
171-
assert_eq!(err.error_len(), error_len);
172-
173-
test_streaming::<simdutf8::basic::imp::aarch64::neon::Utf8ValidatorImp>(input, false);
174-
test_chunked_streaming::<simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp>(
175-
input, false,
176-
);
177-
}
178-
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
101+
#[cfg(feature = "public_imp")]
179102
unsafe {
180-
assert!(simdutf8::basic::imp::wasm32::simd128::validate_utf8(input).is_err());
181-
let err = simdutf8::compat::imp::wasm32::simd128::validate_utf8(input).unwrap_err();
182-
assert_eq!(err.valid_up_to(), valid_up_to);
183-
assert_eq!(err.error_len(), error_len);
184-
185-
test_streaming::<simdutf8::basic::imp::wasm32::simd128::Utf8ValidatorImp>(input, false);
186-
test_chunked_streaming::<simdutf8::basic::imp::wasm32::simd128::ChunkedUtf8ValidatorImp>(
187-
input, false,
103+
assert!(
104+
simdutf8_portable::basic::imp::portable::simd128::validate_utf8(input).is_err()
188105
);
189-
}
190-
#[cfg(feature = "portable_public_imp")]
191-
unsafe {
192-
assert!(simdutf8::basic::imp::portable::simd128::validate_utf8(input).is_err());
193-
let err = simdutf8::compat::imp::portable::simd128::validate_utf8(input).unwrap_err();
106+
let err = simdutf8_portable::compat::imp::portable::simd128::validate_utf8(input)
107+
.unwrap_err();
194108
assert_eq!(err.valid_up_to(), valid_up_to);
195109
assert_eq!(err.error_len(), error_len);
196110

197-
test_streaming::<simdutf8::basic::imp::portable::simd128::Utf8ValidatorImp>(
111+
test_streaming::<simdutf8_portable::basic::imp::portable::simd128::Utf8ValidatorImp>(
198112
input, false,
199113
);
200114
test_chunked_streaming::<
201-
simdutf8::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp,
115+
simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp,
202116
>(input, false);
203117

204-
assert!(simdutf8::basic::imp::portable::simd256::validate_utf8(input).is_err());
205-
let err = simdutf8::compat::imp::portable::simd256::validate_utf8(input).unwrap_err();
118+
assert!(
119+
simdutf8_portable::basic::imp::portable::simd256::validate_utf8(input).is_err()
120+
);
121+
let err = simdutf8_portable::compat::imp::portable::simd256::validate_utf8(input)
122+
.unwrap_err();
206123
assert_eq!(err.valid_up_to(), valid_up_to);
207124
assert_eq!(err.error_len(), error_len);
208125

209-
test_streaming::<simdutf8::basic::imp::portable::simd256::Utf8ValidatorImp>(
126+
test_streaming::<simdutf8_portable::basic::imp::portable::simd256::Utf8ValidatorImp>(
210127
input, false,
211128
);
212129
test_chunked_streaming::<
213-
simdutf8::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp,
130+
simdutf8_portable::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp,
214131
>(input, false);
215132
}
216133
}
217134

218135
#[allow(unused)] // not used if not SIMD implementation is available
219-
fn test_streaming<T: simdutf8::basic::imp::Utf8Validator>(input: &[u8], ok: bool) {
136+
fn test_streaming<T: simdutf8_portable::basic::imp::Utf8Validator>(input: &[u8], ok: bool) {
220137
unsafe {
221138
let mut validator = T::new();
222139
validator.update(input);
@@ -228,7 +145,7 @@ mod public_imp {
228145
}
229146

230147
#[allow(unused)] // not used if not SIMD implementation is available
231-
fn test_streaming_blocks<T: simdutf8::basic::imp::Utf8Validator>(
148+
fn test_streaming_blocks<T: simdutf8_portable::basic::imp::Utf8Validator>(
232149
input: &[u8],
233150
block_size: usize,
234151
ok: bool,
@@ -243,7 +160,7 @@ mod public_imp {
243160
}
244161

245162
#[allow(unused)] // not used if not SIMD implementation is available
246-
fn test_chunked_streaming<T: simdutf8::basic::imp::ChunkedUtf8Validator>(
163+
fn test_chunked_streaming<T: simdutf8_portable::basic::imp::ChunkedUtf8Validator>(
247164
input: &[u8],
248165
ok: bool,
249166
) {
@@ -253,7 +170,9 @@ mod public_imp {
253170
}
254171

255172
#[allow(unused)] // not used if not SIMD implementation is available
256-
fn test_chunked_streaming_with_chunk_size<T: simdutf8::basic::imp::ChunkedUtf8Validator>(
173+
fn test_chunked_streaming_with_chunk_size<
174+
T: simdutf8_portable::basic::imp::ChunkedUtf8Validator,
175+
>(
257176
input: &[u8],
258177
chunk_size: usize,
259178
ok: bool,
@@ -270,46 +189,9 @@ mod public_imp {
270189

271190
#[test]
272191
#[should_panic]
273-
#[cfg(all(
274-
any(target_arch = "x86", target_arch = "x86_64"),
275-
target_feature = "avx2"
276-
))]
277-
fn test_avx2_chunked_panic() {
278-
test_chunked_streaming_with_chunk_size::<
279-
simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp,
280-
>(b"abcd", 1, true);
281-
}
282-
283-
#[test]
284-
#[should_panic]
285-
#[cfg(all(
286-
any(target_arch = "x86", target_arch = "x86_64"),
287-
target_feature = "sse4.2"
288-
))]
289-
fn test_sse42_chunked_panic() {
290-
test_chunked_streaming_with_chunk_size::<
291-
simdutf8::basic::imp::x86::sse42::ChunkedUtf8ValidatorImp,
292-
>(b"abcd", 1, true);
293-
}
294-
295-
#[test]
296-
#[should_panic]
297-
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
298192
fn test_neon_chunked_panic() {
299193
test_chunked_streaming_with_chunk_size::<
300-
simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp,
301-
>(b"abcd", 1, true);
302-
}
303-
304-
// the test runner will ignore this test probably due to limitations of panic handling/threading
305-
// of that target--keeping this here so that when it can be tested properly, it will
306-
// FIXME: remove this comment once this works properly.
307-
#[test]
308-
#[should_panic]
309-
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
310-
fn test_simd128_chunked_panic() {
311-
test_chunked_streaming_with_chunk_size::<
312-
simdutf8::basic::imp::wasm32::simd128::ChunkedUtf8ValidatorImp,
194+
simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp,
313195
>(b"abcd", 1, true);
314196
}
315197
}

0 commit comments

Comments
 (0)