Skip to content

Commit a64209a

Browse files
committed
A little optimization
1 parent 7db47b5 commit a64209a

File tree

2 files changed

+21
-14
lines changed

2 files changed

+21
-14
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ serde_json = "1"
2424

2525
[profile.bench]
2626
lto = true
27+
codegen-units = 1
2728

2829
[profile.instruments]
2930
inherits = "release"

src/aarch64.rs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
use std::arch::aarch64::{
2-
uint8x16_t, vaddq_u8, vaddvq_u8, vceqq_u8, vdupq_n_u8, vld1q_u8, vld1q_u8_x4, vqtbl4q_u8,
2+
uint8x16_t, // lane type
3+
vceqq_u8,
4+
vdupq_n_u8, // comparisons / splat
5+
vld1q_u8,
6+
vld1q_u8_x4, // loads
7+
vmaxvq_u8, // horizontal-max reduction
8+
vorrq_u8, // bit-wise OR
9+
vqtbl4q_u8, // table-lookup
310
};
411
use std::mem::transmute;
512

@@ -17,36 +24,35 @@ pub fn encode_str<S: AsRef<str>>(input: S) -> String {
1724
// Safety: SIMD instructions
1825
unsafe {
1926
let mut start = 0;
27+
let table_low = vld1q_u8_x4(ESCAPE[0..64].as_ptr());
28+
let table_high = vdupq_n_u8(b'\\');
2029
while start + CHUNK_SIZE < len {
21-
let next_chunk = start + CHUNK_SIZE;
22-
let current_chunk_slice = &bytes[start..next_chunk];
23-
let table_low = vld1q_u8_x4(ESCAPE[0..64].as_ptr());
24-
let table_high = vdupq_n_u8(b'\\');
25-
let chunk = vld1q_u8(current_chunk_slice.as_ptr());
30+
let current = &bytes[start..start + CHUNK_SIZE];
31+
32+
let chunk = vld1q_u8(current.as_ptr());
2633
let low_mask = vqtbl4q_u8(table_low, chunk);
2734
let high_mask = vceqq_u8(table_high, chunk);
28-
if vaddvq_u8(low_mask) == 0 && vaddvq_u8(high_mask) == 0 {
29-
writer.extend_from_slice(current_chunk_slice);
30-
start = next_chunk;
35+
if vmaxvq_u8(low_mask) == 0 && vmaxvq_u8(high_mask) == 0 {
36+
writer.extend_from_slice(current);
37+
start += CHUNK_SIZE;
3138
continue;
3239
}
3340

3441
// Vector add the masks to get a single mask
35-
let escape_table_mask = vaddq_u8(low_mask, high_mask);
36-
let escape_table_mask_slice = transmute::<uint8x16_t, [u8; 16]>(escape_table_mask);
42+
let escape_mask = vorrq_u8(low_mask, high_mask);
43+
let escape_table_mask_slice = transmute::<uint8x16_t, [u8; 16]>(escape_mask);
3744
for (index, value) in escape_table_mask_slice.into_iter().enumerate() {
3845
if value == 0 {
3946
writer.push(bytes[start + index]);
4047
} else if value == 255 {
4148
// value is in the high table mask, which means it's `\`
4249
writer.extend_from_slice(REVERSE_SOLIDUS);
4350
} else {
44-
let char_escape =
45-
CharEscape::from_escape_table(value, current_chunk_slice[index]);
51+
let char_escape = CharEscape::from_escape_table(value, current[index]);
4652
write_char_escape(writer, char_escape);
4753
}
4854
}
49-
start = next_chunk;
55+
start += CHUNK_SIZE;
5056
}
5157

5258
if start < len {

0 commit comments

Comments
 (0)