Skip to content

Commit 2e662c8

Browse files
committed
feat: add escape_into
1 parent 2df28ec commit 2e662c8

File tree

3 files changed

+87
-24
lines changed

3 files changed

+87
-24
lines changed

src/aarch64.rs

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,9 @@ const PREFETCH_DISTANCE: usize = CHUNK * 2;
1010
const SLASH_SENTINEL: u8 = 0xFF;
1111

1212
#[inline]
13-
pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
14-
let s = input.as_ref();
15-
let bytes = s.as_bytes();
13+
pub fn escape_neon(bytes: &[u8], output: &mut Vec<u8>) {
1614
let n = bytes.len();
1715

18-
let mut out = Vec::with_capacity(n + 2);
19-
out.push(b'"');
20-
2116
unsafe {
2217
let tbl = vld1q_u8_x4(ESCAPE.as_ptr());
2318
let slash = vdupq_n_u8(b'\\');
@@ -53,18 +48,18 @@ pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
5348
let mask_r_4 = vmaxvq_u8(mask_4);
5449

5550
if mask_r_1 | mask_r_2 | mask_r_3 | mask_r_4 == 0 {
56-
out.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK));
51+
output.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK));
5752
i += CHUNK;
5853
continue;
5954
}
6055

6156
macro_rules! handle {
6257
($mask:expr, $mask_r:expr, $off:expr) => {
6358
if $mask_r == 0 {
64-
out.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
59+
output.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
6560
} else {
6661
vst1q_u8(placeholder.as_mut_ptr(), $mask);
67-
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, &mut out);
62+
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, output);
6863
}
6964
};
7065
}
@@ -78,13 +73,9 @@ pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
7873
}
7974

8075
if i < n {
81-
handle_tail(&bytes[i..], &mut out);
76+
handle_tail(&bytes[i..], output);
8277
}
8378
}
84-
85-
out.push(b'"');
86-
87-
unsafe { String::from_utf8_unchecked(out) }
8879
}
8980

9081
#[inline(always)]

src/generic.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
1313
unsafe { String::from_utf8_unchecked(result) }
1414
}
1515

16+
#[inline]
17+
pub fn escape_into_generic<S: AsRef<str>>(s: S, output: &mut Vec<u8>) {
18+
let s = s.as_ref();
19+
let bytes = s.as_bytes();
20+
output.push(b'"');
21+
escape_inner(bytes, output);
22+
output.push(b'"');
23+
}
24+
1625
#[inline]
1726
// Slightly modified version of
1827
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>

src/lib.rs

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,19 +114,19 @@ mod generic;
114114
#[cfg(target_arch = "x86_64")]
115115
mod x86;
116116

117-
pub use generic::escape_generic;
117+
pub use generic::{escape_generic, escape_into_generic};
118118

119119
/// Main entry point for JSON string escaping with SIMD acceleration
120120
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
121121
pub fn escape<S: AsRef<str>>(input: S) -> String {
122+
use generic::escape_inner;
123+
124+
let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
125+
result.push(b'"');
126+
let s = input.as_ref();
127+
let bytes = s.as_bytes();
122128
#[cfg(target_arch = "x86_64")]
123129
{
124-
use generic::escape_inner;
125-
126-
let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
127-
result.push(b'"');
128-
let s = input.as_ref();
129-
let bytes = s.as_bytes();
130130
let len = bytes.len();
131131
// Runtime CPU feature detection for x86_64
132132
if is_x86_feature_detected!("avx512f")
@@ -153,7 +153,7 @@ pub fn escape<S: AsRef<str>>(input: S) -> String {
153153
{
154154
#[cfg(feature = "force_aarch64_neon")]
155155
{
156-
return aarch64::escape_neon(input);
156+
return aarch64::escape_neon(bytes, &mut result);
157157
}
158158
#[cfg(not(feature = "force_aarch64_neon"))]
159159
{
@@ -162,17 +162,74 @@ pub fn escape<S: AsRef<str>>(input: S) -> String {
162162
// TODO: add support for sve2 chips with wider registers
163163
// github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
164164
if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
165-
return aarch64::escape_neon(input);
165+
aarch64::escape_neon(bytes, &mut result);
166166
} else {
167-
return escape_generic(input);
167+
escape_inner(bytes, &mut result);
168168
}
169+
result.push(b'"');
170+
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
171+
unsafe { String::from_utf8_unchecked(result) }
169172
}
170173
}
171174

172175
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
173176
escape_generic(input)
174177
}
175178

179+
/// Main entry point for JSON string escaping with SIMD acceleration
180+
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
181+
pub fn escape_into<S: AsRef<str>>(input: S, output: &mut Vec<u8>) {
182+
use generic::escape_inner;
183+
184+
output.push(b'"');
185+
let s = input.as_ref();
186+
let bytes = s.as_bytes();
187+
#[cfg(target_arch = "x86_64")]
188+
{
189+
let len = bytes.len();
190+
// Runtime CPU feature detection for x86_64
191+
if is_x86_feature_detected!("avx512f")
192+
&& is_x86_feature_detected!("avx512bw")
193+
&& len >= x86::LOOP_SIZE_AVX512
194+
{
195+
unsafe { x86::escape_avx512(bytes, output) }
196+
} else if is_x86_feature_detected!("avx2") && len >= x86::LOOP_SIZE_AVX2 {
197+
unsafe { x86::escape_avx2(bytes, output) }
198+
} else if is_x86_feature_detected!("sse2")
199+
&& /* if len < 128, no need to use simd */
200+
len >= x86::LOOP_SIZE_AVX2
201+
{
202+
unsafe { x86::escape_sse2(bytes, output) }
203+
} else {
204+
escape_inner(bytes, output);
205+
}
206+
output.push(b'"');
207+
}
208+
209+
#[cfg(target_arch = "aarch64")]
210+
{
211+
#[cfg(feature = "force_aarch64_neon")]
212+
{
213+
return aarch64::escape_neon(bytes, output);
214+
}
215+
#[cfg(not(feature = "force_aarch64_neon"))]
216+
{
217+
// on Apple M2 and later, the `bf16` feature is available
218+
// it means they have more registers and can significantly benefit from the SIMD path
219+
// TODO: add support for sve2 chips with wider registers
220+
// github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
221+
if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
222+
aarch64::escape_neon(bytes, output);
223+
} else {
224+
escape_inner(bytes, output);
225+
}
226+
}
227+
}
228+
229+
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
230+
escape_into_generic(input, output);
231+
}
232+
176233
#[test]
177234
fn test_escape_ascii_json_string() {
178235
let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
@@ -377,6 +434,9 @@ fn test_rxjs() {
377434
assert!(!sources.is_empty());
378435
for source in sources {
379436
assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
437+
let mut output = String::new();
438+
escape_into(&source, unsafe { output.as_mut_vec() });
439+
assert_eq!(output, serde_json::to_string(&source).unwrap());
380440
}
381441
}
382442

@@ -402,5 +462,8 @@ fn test_sources() {
402462
assert!(!sources.is_empty());
403463
for source in sources {
404464
assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
465+
let mut output = String::new();
466+
escape_into(&source, unsafe { output.as_mut_vec() });
467+
assert_eq!(output, serde_json::to_string(&source).unwrap());
405468
}
406469
}

0 commit comments

Comments
 (0)