Skip to content

Commit 0b44a36

Browse files
authored
Port ASCII optimization from simdjson: Check whole block at once (#18)
Port ASCII optimization from simdjson, checks whole block at once, or'ing the SIMD chunks and then testing them for the high bit.
1 parent a6a2bd6 commit 0b44a36

File tree

3 files changed

+44
-29
lines changed

3 files changed

+44
-29
lines changed

src/implementation/macros.rs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,23 +37,34 @@ macro_rules! static_cast_i8 {
3737
};
3838
}
3939

40+
macro_rules! check_utf8 {
41+
($feat:expr, $t:ident) => {
42+
#[target_feature(enable = $feat)]
43+
#[inline]
44+
unsafe fn check_utf8(&self, previous: &mut Utf8CheckingState<$t>) {
45+
if likely!(self.is_ascii()) {
46+
previous.error =
47+
Utf8CheckingState::<$t>::check_eof(previous.error, previous.incomplete)
48+
} else {
49+
self.check_block(previous);
50+
}
51+
}
52+
};
53+
}
54+
4055
/// check_bytes() strategy
4156
macro_rules! check_bytes {
4257
($feat:expr, $t:ident) => {
4358
#[target_feature(enable = $feat)]
4459
#[inline]
4560
unsafe fn check_bytes(current: $t, previous: &mut Utf8CheckingState<$t>) {
46-
if likely!(Self::is_ascii(current)) {
47-
previous.error = Self::check_eof(previous.error, previous.incomplete)
48-
} else {
49-
let prev1 = Self::prev1(current, previous.prev);
50-
let sc = Self::check_special_cases(current, prev1);
51-
previous.error = Self::or(
52-
previous.error,
53-
Self::check_multibyte_lengths(current, previous.prev, sc),
54-
);
55-
previous.incomplete = Self::is_incomplete(current);
56-
}
61+
let prev1 = Self::prev1(current, previous.prev);
62+
let sc = Self::check_special_cases(current, prev1);
63+
previous.error = Self::or(
64+
previous.error,
65+
Self::check_multibyte_lengths(current, previous.prev, sc),
66+
);
67+
previous.incomplete = Self::is_incomplete(current);
5768
previous.prev = current
5869
}
5970
};

src/implementation/x86/avx2.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ impl Utf8CheckingState<__m256i> {
3737
_mm256_or_si256(a, b)
3838
}
3939

40-
#[target_feature(enable = "avx2")]
41-
#[inline]
42-
unsafe fn is_ascii(input: __m256i) -> bool {
43-
_mm256_movemask_epi8(input) == 0
44-
}
45-
4640
#[target_feature(enable = "avx2")]
4741
#[inline]
4842
unsafe fn check_eof(error: __m256i, incomplete: __m256i) -> __m256i {
@@ -272,8 +266,6 @@ impl Utf8CheckingState<__m256i> {
272266
_mm256_testz_si256(error, error) != 1
273267
}
274268

275-
#[target_feature(enable = "avx2")]
276-
#[inline]
277269
check_bytes!("avx2", __m256i);
278270
}
279271

@@ -302,11 +294,18 @@ impl SimdInput {
302294

303295
#[target_feature(enable = "avx2")]
304296
#[inline]
305-
unsafe fn check_utf8(&self, state: &mut Utf8CheckingState<__m256i>) {
297+
unsafe fn check_block(&self, state: &mut Utf8CheckingState<__m256i>) {
306298
Utf8CheckingState::<__m256i>::check_bytes(self.v0, state);
307299
Utf8CheckingState::<__m256i>::check_bytes(self.v1, state);
308300
}
309301

302+
#[target_feature(enable = "avx2")]
303+
#[inline]
304+
unsafe fn is_ascii(&self) -> bool {
305+
let res = _mm256_or_si256(self.v0, self.v1);
306+
_mm256_movemask_epi8(res) == 0
307+
}
308+
310309
#[target_feature(enable = "avx2")]
311310
#[inline]
312311
unsafe fn check_eof(state: &mut Utf8CheckingState<__m256i>) {
@@ -318,6 +317,8 @@ impl SimdInput {
318317
unsafe fn check_utf8_errors(state: &Utf8CheckingState<__m256i>) -> bool {
319318
Utf8CheckingState::<__m256i>::has_error(state.error)
320319
}
320+
321+
check_utf8!("avx2", __m256i);
321322
}
322323

323324
use crate::implementation::Temp2x64A32;

src/implementation/x86/sse42.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,6 @@ impl Utf8CheckingState<__m128i> {
3636
_mm_or_si128(a, b)
3737
}
3838

39-
#[target_feature(enable = "sse4.2")]
40-
#[inline]
41-
unsafe fn is_ascii(input: __m128i) -> bool {
42-
_mm_movemask_epi8(input) == 0
43-
}
44-
4539
#[target_feature(enable = "sse4.2")]
4640
#[inline]
4741
unsafe fn check_eof(error: __m128i, incomplete: __m128i) -> __m128i {
@@ -205,8 +199,6 @@ impl Utf8CheckingState<__m128i> {
205199
_mm_testz_si128(error, error) != 1
206200
}
207201

208-
#[target_feature(enable = "sse4.2")]
209-
#[inline]
210202
check_bytes!("sse4.2", __m128i);
211203
}
212204

@@ -239,13 +231,22 @@ impl SimdInput {
239231

240232
#[target_feature(enable = "sse4.2")]
241233
#[inline]
242-
unsafe fn check_utf8(&self, state: &mut Utf8CheckingState<__m128i>) {
234+
unsafe fn check_block(&self, state: &mut Utf8CheckingState<__m128i>) {
243235
Utf8CheckingState::<__m128i>::check_bytes(self.v0, state);
244236
Utf8CheckingState::<__m128i>::check_bytes(self.v1, state);
245237
Utf8CheckingState::<__m128i>::check_bytes(self.v2, state);
246238
Utf8CheckingState::<__m128i>::check_bytes(self.v3, state);
247239
}
248240

241+
#[target_feature(enable = "sse4.2")]
242+
#[inline]
243+
unsafe fn is_ascii(&self) -> bool {
244+
let r1 = _mm_or_si128(self.v0, self.v1);
245+
let r2 = _mm_or_si128(self.v2, self.v3);
246+
let r = _mm_or_si128(r1, r2);
247+
_mm_movemask_epi8(r) == 0
248+
}
249+
249250
#[target_feature(enable = "sse4.2")]
250251
#[inline]
251252
unsafe fn check_eof(state: &mut Utf8CheckingState<__m128i>) {
@@ -257,6 +258,8 @@ impl SimdInput {
257258
unsafe fn check_utf8_errors(state: &Utf8CheckingState<__m128i>) -> bool {
258259
Utf8CheckingState::<__m128i>::has_error(state.error)
259260
}
261+
262+
check_utf8!("sse4.2", __m128i);
260263
}
261264

262265
use crate::implementation::Temp2x64A16;

0 commit comments

Comments
 (0)