Skip to content

Commit e21ab98

Browse files
committed
Reduce placeholder allocate
1 parent 35c21dd commit e21ab98

File tree

1 file changed

+23
-43
lines changed

1 file changed

+23
-43
lines changed

src/aarch64.rs

Lines changed: 23 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,43 +10,36 @@ const CHUNK: usize = 64;
1010
pub fn encode_str<S: AsRef<str>>(input: S) -> String {
1111
let s = input.as_ref();
1212
let mut out = Vec::with_capacity(s.len() + 2);
13-
let b = s.as_bytes();
14-
let n = b.len();
13+
let bytes = s.as_bytes();
14+
let n = bytes.len();
1515
out.push(b'"');
1616

1717
unsafe {
1818
let tbl = vld1q_u8_x4(ESCAPE.as_ptr()); // first 64 B of the escape table
1919
let slash = vdupq_n_u8(b'\\');
2020
let mut i = 0;
21+
let mut placeholder: [u8; 16] = core::mem::zeroed();
2122

2223
while i + CHUNK <= n {
23-
let ptr = b.as_ptr().add(i);
24+
let ptr = bytes.as_ptr().add(i);
2425

2526
/* ---- L1 prefetch: CHUNK size ahead ---- */
2627
core::arch::asm!("prfm pldl1keep, [{0}, #64]", in(reg) ptr);
2728
/* ------------------------------------------ */
2829

2930
// load 64 B (four q-regs)
3031
let a = vld1q_u8(ptr);
31-
let m1 = vqtbl4q_u8(tbl, a);
32-
let m2 = vceqq_u8(slash, a);
3332

34-
let b2 = vld1q_u8(ptr.add(16));
35-
let m3 = vqtbl4q_u8(tbl, b2);
36-
let m4 = vceqq_u8(slash, b2);
33+
let b = vld1q_u8(ptr.add(16));
3734

3835
let c = vld1q_u8(ptr.add(32));
39-
let m5 = vqtbl4q_u8(tbl, c);
40-
let m6 = vceqq_u8(slash, c);
4136

4237
let d = vld1q_u8(ptr.add(48));
43-
let m7 = vqtbl4q_u8(tbl, d);
44-
let m8 = vceqq_u8(slash, d);
4538

46-
let mask_1 = vorrq_u8(m1, m2);
47-
let mask_2 = vorrq_u8(m3, m4);
48-
let mask_3 = vorrq_u8(m5, m6);
49-
let mask_4 = vorrq_u8(m7, m8);
39+
let mask_1 = vorrq_u8(vqtbl4q_u8(tbl, a), vceqq_u8(slash, a));
40+
let mask_2 = vorrq_u8(vqtbl4q_u8(tbl, b), vceqq_u8(slash, b));
41+
let mask_3 = vorrq_u8(vqtbl4q_u8(tbl, c), vceqq_u8(slash, c));
42+
let mask_4 = vorrq_u8(vqtbl4q_u8(tbl, d), vceqq_u8(slash, d));
5043

5144
let mask_r_1 = vmaxvq_u8(mask_1);
5245
let mask_r_2 = vmaxvq_u8(mask_2);
@@ -59,40 +52,27 @@ pub fn encode_str<S: AsRef<str>>(input: S) -> String {
5952
i += CHUNK;
6053
continue;
6154
}
62-
let mut tmp: [u8; 16] = core::mem::zeroed();
6355

64-
if mask_r_1 == 0 {
65-
out.extend_from_slice(std::slice::from_raw_parts(ptr, 16));
66-
} else {
67-
vst1q_u8(tmp.as_mut_ptr(), mask_1);
68-
handle_block(&b[i..i + 16], &tmp, &mut out);
56+
macro_rules! handle {
57+
($mask:expr, $mask_r:expr, $off:expr) => {
58+
if $mask_r == 0 {
59+
out.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
60+
} else {
61+
vst1q_u8(placeholder.as_mut_ptr(), $mask);
62+
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, &mut out);
63+
}
64+
};
6965
}
7066

71-
if mask_r_2 == 0 {
72-
out.extend_from_slice(std::slice::from_raw_parts(ptr.add(16), 16));
73-
} else {
74-
vst1q_u8(tmp.as_mut_ptr(), mask_2);
75-
handle_block(&b[i + 16..i + 32], &tmp, &mut out);
76-
}
77-
78-
if mask_r_3 == 0 {
79-
out.extend_from_slice(std::slice::from_raw_parts(ptr.add(32), 16));
80-
} else {
81-
vst1q_u8(tmp.as_mut_ptr(), mask_3);
82-
handle_block(&b[i + 32..i + 48], &tmp, &mut out);
83-
}
84-
85-
if mask_r_4 == 0 {
86-
out.extend_from_slice(std::slice::from_raw_parts(ptr.add(48), 16));
87-
} else {
88-
vst1q_u8(tmp.as_mut_ptr(), mask_4);
89-
handle_block(&b[i + 48..i + 64], &tmp, &mut out);
90-
}
67+
handle!(mask_1, mask_r_1, 0);
68+
handle!(mask_2, mask_r_2, 16);
69+
handle!(mask_3, mask_r_3, 32);
70+
handle!(mask_4, mask_r_4, 48);
9171

9272
i += CHUNK;
9373
}
9474
if i < n {
95-
encode_str_inner(&b[i..], &mut out);
75+
encode_str_inner(&bytes[i..], &mut out);
9676
}
9777
}
9878
out.push(b'"');

0 commit comments

Comments
 (0)