diff --git a/src/aarch64.rs b/src/aarch64.rs index 914e06e..03a9f4e 100644 --- a/src/aarch64.rs +++ b/src/aarch64.rs @@ -10,14 +10,9 @@ const PREFETCH_DISTANCE: usize = CHUNK * 2; const SLASH_SENTINEL: u8 = 0xFF; #[inline] -pub fn escape_neon>(input: S) -> String { - let s = input.as_ref(); - let bytes = s.as_bytes(); +pub fn escape_neon(bytes: &[u8], output: &mut Vec) { let n = bytes.len(); - let mut out = Vec::with_capacity(n + 2); - out.push(b'"'); - unsafe { let tbl = vld1q_u8_x4(ESCAPE.as_ptr()); let slash = vdupq_n_u8(b'\\'); @@ -53,7 +48,7 @@ pub fn escape_neon>(input: S) -> String { let mask_r_4 = vmaxvq_u8(mask_4); if mask_r_1 | mask_r_2 | mask_r_3 | mask_r_4 == 0 { - out.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK)); + output.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK)); i += CHUNK; continue; } @@ -61,10 +56,10 @@ pub fn escape_neon>(input: S) -> String { macro_rules! handle { ($mask:expr, $mask_r:expr, $off:expr) => { if $mask_r == 0 { - out.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16)); + output.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16)); } else { vst1q_u8(placeholder.as_mut_ptr(), $mask); - handle_block(&bytes[i + $off..i + $off + 16], &placeholder, &mut out); + handle_block(&bytes[i + $off..i + $off + 16], &placeholder, output); } }; } @@ -78,13 +73,9 @@ pub fn escape_neon>(input: S) -> String { } if i < n { - handle_tail(&bytes[i..], &mut out); + handle_tail(&bytes[i..], output); } } - - out.push(b'"'); - - unsafe { String::from_utf8_unchecked(out) } } #[inline(always)] diff --git a/src/generic.rs b/src/generic.rs index ec0db08..8e73929 100644 --- a/src/generic.rs +++ b/src/generic.rs @@ -13,6 +13,15 @@ pub fn escape_generic>(s: S) -> String { unsafe { String::from_utf8_unchecked(result) } } +#[inline] +pub fn escape_into_generic>(s: S, output: &mut Vec) { + let s = s.as_ref(); + let bytes = s.as_bytes(); + output.push(b'"'); + escape_inner(bytes, output); + output.push(b'"'); +} + #[inline] // Slightly modified version of // diff --git a/src/lib.rs b/src/lib.rs index 73138f1..1fe6e58 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,19 +114,19 @@ mod generic; #[cfg(target_arch = "x86_64")] mod x86; -pub use generic::escape_generic; +pub use generic::{escape_generic, escape_into_generic}; /// Main entry point for JSON string escaping with SIMD acceleration /// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used. pub fn escape>(input: S) -> String { + use generic::escape_inner; + + let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2); + result.push(b'"'); + let s = input.as_ref(); + let bytes = s.as_bytes(); #[cfg(target_arch = "x86_64")] { - use generic::escape_inner; - - let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2); - result.push(b'"'); - let s = input.as_ref(); - let bytes = s.as_bytes(); let len = bytes.len(); // Runtime CPU feature detection for x86_64 if is_x86_feature_detected!("avx512f") @@ -144,16 +144,71 @@ pub fn escape>(input: S) -> String { } else { escape_inner(bytes, &mut result); } - result.push(b'"'); - // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences) - unsafe { String::from_utf8_unchecked(result) } } #[cfg(target_arch = "aarch64")] { #[cfg(feature = "force_aarch64_neon")] { - return aarch64::escape_neon(input); + aarch64::escape_neon(bytes, &mut result); + } + #[cfg(not(feature = "force_aarch64_neon"))] + { + // on Apple M2 and later, the `bf16` feature is available + // it means they have more registers and can significantly benefit from the SIMD path + // TODO: add support for sve2 chips with wider registers + // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path + if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") { + aarch64::escape_neon(bytes, &mut result); + } else { + escape_inner(bytes, &mut result); + } + } + } + + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + escape_inner(bytes, &mut result); + } + result.push(b'"'); + // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences) + unsafe { String::from_utf8_unchecked(result) } +} + +/// Main entry point for JSON string escaping with SIMD acceleration +/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used. +pub fn escape_into>(input: S, output: &mut Vec) { + use generic::escape_inner; + + output.push(b'"'); + let s = input.as_ref(); + let bytes = s.as_bytes(); + #[cfg(target_arch = "x86_64")] + { + let len = bytes.len(); + // Runtime CPU feature detection for x86_64 + if is_x86_feature_detected!("avx512f") + && is_x86_feature_detected!("avx512bw") + && len >= x86::LOOP_SIZE_AVX512 + { + unsafe { x86::escape_avx512(bytes, output) } + } else if is_x86_feature_detected!("avx2") && len >= x86::LOOP_SIZE_AVX2 { + unsafe { x86::escape_avx2(bytes, output) } + } else if is_x86_feature_detected!("sse2") + && /* if len < 128, no need to use simd */ + len >= x86::LOOP_SIZE_AVX2 + { + unsafe { x86::escape_sse2(bytes, output) } + } else { + escape_inner(bytes, output); + } + } + + #[cfg(target_arch = "aarch64")] + { + #[cfg(feature = "force_aarch64_neon")] + { + return aarch64::escape_neon(bytes, output); } #[cfg(not(feature = "force_aarch64_neon"))] { @@ -162,15 +217,18 @@ pub fn escape>(input: S) -> String { // TODO: add support for sve2 chips with wider registers // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") { - return aarch64::escape_neon(input); + aarch64::escape_neon(bytes, output); } else { - return escape_generic(input); + escape_inner(bytes, output); } } } #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] - escape_generic(input) + { + escape_into_generic(input, output); + } + output.push(b'"'); } #[test] @@ -377,6 +435,9 @@ fn test_rxjs() { assert!(!sources.is_empty()); for source in sources { assert_eq!(escape(&source), serde_json::to_string(&source).unwrap()); + let mut output = String::new(); + escape_into(&source, unsafe { output.as_mut_vec() }); + assert_eq!(output, serde_json::to_string(&source).unwrap()); } } @@ -402,5 +463,8 @@ fn test_sources() { assert!(!sources.is_empty()); for source in sources { assert_eq!(escape(&source), serde_json::to_string(&source).unwrap()); + let mut output = String::new(); + escape_into(&source, unsafe { output.as_mut_vec() }); + assert_eq!(output, serde_json::to_string(&source).unwrap()); } }