Skip to content

Commit 5fda078

Browse files
authored
Make sure there are no memcpy calls (#108)
Apparently even with the hand-rolled `memcpy_unaligned_nonoverlapping_inline_opt_lt_64` llvm still replaced that code for the last < 8 bytes with a call to `memcpy`. This fixes it. The CI inlining tests now check for all present symbols. The rationale is that referenced symbols must be called from the simdutf8 if present. As a drive-by change it uses `get_unchecked` to get rid of panicking code in `validate_utf8_at_offset` and `get_compat_error`.
1 parent 5d531be commit 5fda078

File tree

3 files changed

+66
-17
lines changed

3 files changed

+66
-17
lines changed

inlining/check-inlining.sh

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,30 @@ build_args="${3:-}"
77
cargo clean --quiet
88
cargo build --quiet --release --target $target $build_args
99
LLVM_NM=$(rustc --print sysroot)/lib/rustlib/$(rustc -vV | sed -n 's|host: ||p')/bin/llvm-nm
10-
nm_output=$($LLVM_NM --defined-only ../target/$target/release/libsimdutf8.rlib)
10+
nm_output=$($LLVM_NM ../target/$target/release/libsimdutf8.rlib)
1111
if [[ $target == *darwin* ]]; then
12-
pattern=" (t|T) _"
12+
pattern=" (t|T|U) _"
1313
cut_arg=21
1414
elif [[ $target == *armv7* ]]; then
15-
pattern=" (t|T) "
15+
pattern=" (t|T|U) "
1616
cut_arg=12
1717
else
18-
pattern=" (t|T) "
18+
pattern=" (t|T|U) "
1919
cut_arg=20
2020
fi
21-
inline_ignore_pattern='drop_in_place|::fmt::|^\$x\.|^<T as core::convert::From<T>>::from$|^core::result::Result<T,E>::map_err$'
22-
echo "$nm_output" | rustfilt | egrep "$pattern" | cut -c "$cut_arg"- | grep -Ev "$inline_ignore_pattern" | sort | diff -u $expected_fns -
21+
inline_ignore_pattern=\
22+
'__aeabi_unwind_cpp_pr(0|1)|'\
23+
'drop_in_place|'\
24+
'core::str::converts::from_utf8|'\
25+
'std_detect::detect::|'\
26+
'::fmt::|'\
27+
'^\$x\.|'\
28+
'^<T as core::convert::From<T>>::from$|'\
29+
'^core::str::Utf8Error::error_len$|'\
30+
'^core::str::Utf8Error::valid_up_to$|'\
31+
'^core::str::from_utf8$|'\
32+
'^core::result::Result<T,E>::map_err$'
33+
if [[ $target == *wasm* ]]; then
34+
inline_ignore_pattern="$inline_ignore_pattern|ct_function_table|pointer|r::converts::from_utf8|t::Formatter::write_str|t::write"
35+
fi
36+
echo "$nm_output" | rustfilt | egrep "$pattern" | cut -c "$cut_arg"- | grep -Ev "$inline_ignore_pattern" | sort -u | diff -u $expected_fns -

src/implementation/helpers.rs

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
1+
use core::hint::unreachable_unchecked;
2+
13
type Utf8ErrorCompat = crate::compat::Utf8Error;
24

5+
/// Uses core::str::from_utf8 to validate that the subslice
6+
/// starting at `offset` is valid UTF-8.
7+
///
8+
/// # Safety
9+
/// Caller has to ensure that `offset` is in bounds.
10+
///
311
#[inline]
412
#[flexpect::e(clippy::cast_possible_truncation)]
5-
pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> {
6-
match core::str::from_utf8(&input[offset..]) {
13+
pub(crate) unsafe fn validate_utf8_at_offset(
14+
input: &[u8],
15+
offset: usize,
16+
) -> Result<(), Utf8ErrorCompat> {
17+
let input = input.get_unchecked(offset..);
18+
match core::str::from_utf8(input) {
719
Ok(_) => Ok(()),
820
Err(err) => Err(Utf8ErrorCompat {
921
valid_up_to: err.valid_up_to() + offset,
@@ -15,8 +27,17 @@ pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(),
1527
}
1628
}
1729

30+
/// Necessary tor 1.38 compatibility
31+
#[inline]
32+
unsafe fn unwrap_err_unchecked<O, E>(r: Result<O, E>) -> E {
33+
match r {
34+
// SAFETY: the safety contract must be upheld by the caller.
35+
Ok(_) => unreachable_unchecked(),
36+
Err(e) => e,
37+
}
38+
}
39+
1840
#[cold]
19-
#[flexpect::e(clippy::unwrap_used)]
2041
#[allow(dead_code)] // only used if there is a SIMD implementation
2142
pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat {
2243
let offset = if failing_block_pos == 0 {
@@ -29,12 +50,14 @@ pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8Er
2950
// three bytes are all continuation bytes then the previous block ends with a four byte
3051
// UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the
3152
// current block in that case.
53+
//
54+
// SAFETY: safe because failing_block_pos is in bounds.
3255
(1..=3)
33-
.find(|i| input[failing_block_pos - i] >> 6 != 0b10)
56+
.find(|i| *(unsafe { input.get_unchecked(failing_block_pos - i) }) >> 6 != 0b10)
3457
.map_or(failing_block_pos, |i| failing_block_pos - i)
3558
};
36-
// UNWRAP: safe because the SIMD UTF-8 validation found an error
37-
validate_utf8_at_offset(input, offset).unwrap_err()
59+
// SAFETY: safe because the SIMD UTF-8 validation found an error and offset is in bounds.
60+
unsafe { unwrap_err_unchecked(validate_utf8_at_offset(input, offset)) }
3861
}
3962

4063
#[allow(dead_code)] // only used if there is a SIMD implementation
@@ -70,11 +93,22 @@ pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
7093
memcpy_u64(&mut src, &mut dest);
7194
len -= 8;
7295
}
73-
while len > 0 {
96+
if len >= 4 {
97+
dest.cast::<u32>()
98+
.write_unaligned(src.cast::<u32>().read_unaligned());
99+
src = src.offset(4);
100+
dest = dest.offset(4);
101+
len -= 4;
102+
}
103+
if len >= 2 {
104+
dest.cast::<u16>()
105+
.write_unaligned(src.cast::<u16>().read_unaligned());
106+
src = src.offset(2);
107+
dest = dest.offset(2);
108+
len -= 2;
109+
}
110+
if len == 1 {
74111
*dest = *src;
75-
src = src.offset(1);
76-
dest = dest.offset(1);
77-
len -= 1;
78112
}
79113
}
80114

src/implementation/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,6 @@ pub(crate) fn validate_utf8_basic_fallback(input: &[u8]) -> Result<(), crate::ba
108108

109109
#[inline]
110110
pub(crate) fn validate_utf8_compat_fallback(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
111-
helpers::validate_utf8_at_offset(input, 0)
111+
// SAFETY: 0 is always in bounds
112+
unsafe { helpers::validate_utf8_at_offset(input, 0) }
112113
}

0 commit comments

Comments
 (0)