Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 5 additions & 14 deletions src/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,9 @@ const PREFETCH_DISTANCE: usize = CHUNK * 2;
const SLASH_SENTINEL: u8 = 0xFF;

#[inline]
pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
let s = input.as_ref();
let bytes = s.as_bytes();
pub fn escape_neon(bytes: &[u8], output: &mut Vec<u8>) {
let n = bytes.len();

let mut out = Vec::with_capacity(n + 2);
out.push(b'"');

unsafe {
let tbl = vld1q_u8_x4(ESCAPE.as_ptr());
let slash = vdupq_n_u8(b'\\');
Expand Down Expand Up @@ -53,18 +48,18 @@ pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
let mask_r_4 = vmaxvq_u8(mask_4);

if mask_r_1 | mask_r_2 | mask_r_3 | mask_r_4 == 0 {
out.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK));
output.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK));
i += CHUNK;
continue;
}

macro_rules! handle {
($mask:expr, $mask_r:expr, $off:expr) => {
if $mask_r == 0 {
out.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
output.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
} else {
vst1q_u8(placeholder.as_mut_ptr(), $mask);
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, &mut out);
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, output);
}
};
}
Expand All @@ -78,13 +73,9 @@ pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
}

if i < n {
handle_tail(&bytes[i..], &mut out);
handle_tail(&bytes[i..], output);
}
}

out.push(b'"');

unsafe { String::from_utf8_unchecked(out) }
}

#[inline(always)]
Expand Down
9 changes: 9 additions & 0 deletions src/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
unsafe { String::from_utf8_unchecked(result) }
}

#[inline]
pub fn escape_into_generic<S: AsRef<str>>(s: S, output: &mut Vec<u8>) {
let s = s.as_ref();
let bytes = s.as_bytes();
output.push(b'"');
escape_inner(bytes, output);
output.push(b'"');
}

#[inline]
// Slightly modified version of
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
Expand Down
92 changes: 78 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,19 @@ mod generic;
#[cfg(target_arch = "x86_64")]
mod x86;

pub use generic::escape_generic;
pub use generic::{escape_generic, escape_into_generic};

/// Main entry point for JSON string escaping with SIMD acceleration
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
pub fn escape<S: AsRef<str>>(input: S) -> String {
use generic::escape_inner;

let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
result.push(b'"');
let s = input.as_ref();
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
use generic::escape_inner;

let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
result.push(b'"');
let s = input.as_ref();
let bytes = s.as_bytes();
let len = bytes.len();
// Runtime CPU feature detection for x86_64
if is_x86_feature_detected!("avx512f")
Expand All @@ -144,16 +144,71 @@ pub fn escape<S: AsRef<str>>(input: S) -> String {
} else {
escape_inner(bytes, &mut result);
}
result.push(b'"');
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
unsafe { String::from_utf8_unchecked(result) }
}

#[cfg(target_arch = "aarch64")]
{
#[cfg(feature = "force_aarch64_neon")]
{
return aarch64::escape_neon(input);
aarch64::escape_neon(bytes, &mut result);
}
#[cfg(not(feature = "force_aarch64_neon"))]
{
// on Apple M2 and later, the `bf16` feature is available
// it means they have more registers and can significantly benefit from the SIMD path
// TODO: add support for sve2 chips with wider registers
// github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
aarch64::escape_neon(bytes, &mut result);
} else {
escape_inner(bytes, &mut result);
}
}
}

#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
escape_inner(bytes, &mut result);
}
result.push(b'"');
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
unsafe { String::from_utf8_unchecked(result) }
}

/// Main entry point for JSON string escaping with SIMD acceleration
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
pub fn escape_into<S: AsRef<str>>(input: S, output: &mut Vec<u8>) {
use generic::escape_inner;

output.push(b'"');
let s = input.as_ref();
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
let len = bytes.len();
// Runtime CPU feature detection for x86_64
if is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("avx512bw")
&& len >= x86::LOOP_SIZE_AVX512
{
unsafe { x86::escape_avx512(bytes, output) }
} else if is_x86_feature_detected!("avx2") && len >= x86::LOOP_SIZE_AVX2 {
unsafe { x86::escape_avx2(bytes, output) }
} else if is_x86_feature_detected!("sse2")
&& /* if len < 128, no need to use simd */
len >= x86::LOOP_SIZE_AVX2
{
unsafe { x86::escape_sse2(bytes, output) }
} else {
escape_inner(bytes, output);
}
}

#[cfg(target_arch = "aarch64")]
{
#[cfg(feature = "force_aarch64_neon")]
{
return aarch64::escape_neon(bytes, output);
}
#[cfg(not(feature = "force_aarch64_neon"))]
{
Expand All @@ -162,15 +217,18 @@ pub fn escape<S: AsRef<str>>(input: S) -> String {
// TODO: add support for sve2 chips with wider registers
// github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
return aarch64::escape_neon(input);
aarch64::escape_neon(bytes, output);
} else {
return escape_generic(input);
escape_inner(bytes, output);
}
}
}

#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
escape_generic(input)
{
escape_into_generic(input, output);
}
output.push(b'"');
}

#[test]
Expand Down Expand Up @@ -377,6 +435,9 @@ fn test_rxjs() {
assert!(!sources.is_empty());
for source in sources {
assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
let mut output = String::new();
escape_into(&source, unsafe { output.as_mut_vec() });
assert_eq!(output, serde_json::to_string(&source).unwrap());
}
}

Expand All @@ -402,5 +463,8 @@ fn test_sources() {
assert!(!sources.is_empty());
for source in sources {
assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
let mut output = String::new();
escape_into(&source, unsafe { output.as_mut_vec() });
assert_eq!(output, serde_json::to_string(&source).unwrap());
}
}
Loading