diff --git a/README.md b/README.md index dad318c..a146a0c 100644 --- a/README.md +++ b/README.md @@ -27,21 +27,21 @@ Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion | Implementation | Median time | vs fastest | | --------------------- | ------------- | ---------- | -| **`escape simd`** | **345.06 µs** | **1.00×** | -| `escape v_jsonescape` | 576.25 µs | 1.67× | -| `escape generic` | 657.94 µs | 1.91× | -| `serde_json` | 766.72 µs | 2.22× | -| `json-escape` | 782.65 µs | 2.27× | +| **`escape simd`** | **341.18 µs** | **1.00×** | +| `escape v_jsonescape` | 555.47 µs | 1.63× | +| `escape generic` | 656.85 µs | 1.93× | +| `serde_json` | 744.75 µs | 2.18× | +| `json-escape` | 777.15 µs | 2.28× | **Fixtures payload (~300 iterations)** | Implementation | Median time | vs fastest | | --------------------- | ------------ | ---------- | -| **`escape simd`** | **12.84 ms** | **1.00×** | -| `escape v_jsonescape` | 19.66 ms | 1.53× | -| `escape generic` | 22.53 ms | 1.75× | -| `serde_json` | 24.65 ms | 1.92× | -| `json-escape` | 26.64 ms | 2.07× | +| **`escape simd`** | **12.67 ms** | **1.00×** | +| `escape v_jsonescape` | 20.58 ms | 1.62× | +| `escape generic` | 22.57 ms | 1.78× | +| `serde_json` | 24.52 ms | 1.94× | +| `json-escape` | 26.97 ms | 2.13× | ### GitHub Actions aarch64 (`ubuntu-24.04-arm`) diff --git a/src/generic.rs b/src/generic.rs index 40dea78..ec0db08 100644 --- a/src/generic.rs +++ b/src/generic.rs @@ -1,19 +1,24 @@ #[inline] -// Slightly modified version of -// -// Borrowed from: -// pub fn escape_generic>(s: S) -> String { let s = s.as_ref(); let bytes = s.as_bytes(); - // Estimate capacity - most strings don't need much escaping // Add some padding for potential escapes let estimated_capacity = bytes.len() + bytes.len() / 2 + 2; let mut result = Vec::with_capacity(estimated_capacity); - result.push(b'"'); + escape_inner(bytes, &mut result); + result.push(b'"'); + // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences) + unsafe { String::from_utf8_unchecked(result) } +} +#[inline] +// Slightly modified version of +// +// Borrowed from: +// +pub(crate) fn escape_inner(bytes: &[u8], result: &mut Vec) { let mut start = 0; let mut i = 0; @@ -55,11 +60,6 @@ pub fn escape_generic>(s: S) -> String { if start < bytes.len() { result.extend_from_slice(&bytes[start..]); } - - result.push(b'"'); - - // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences) - unsafe { String::from_utf8_unchecked(result) } } const BB: u8 = b'b'; // \x08 diff --git a/src/lib.rs b/src/lib.rs index 03a2137..3e3e1ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,21 +22,21 @@ //! //! | Implementation | Median time | vs fastest | //! | --------------------- | ------------- | ---------- | -//! | **`escape simd`** | **345.06 µs** | **1.00×** | -//! | `escape v_jsonescape` | 576.25 µs | 1.67× | -//! | `escape generic` | 657.94 µs | 1.91× | -//! | `serde_json` | 766.72 µs | 2.22× | -//! | `json-escape` | 782.65 µs | 2.27× | +//! | **`escape simd`** | **341.18 µs** | **1.00×** | +//! | `escape v_jsonescape` | 555.47 µs | 1.63× | +//! | `escape generic` | 656.85 µs | 1.93× | +//! | `serde_json` | 744.75 µs | 2.18× | +//! | `json-escape` | 777.15 µs | 2.28× | //! //! **Fixtures payload (~300 iterations)** //! //! | Implementation | Median time | vs fastest | //! | --------------------- | ------------ | ---------- | -//! | **`escape simd`** | **12.84 ms** | **1.00×** | -//! | `escape v_jsonescape` | 19.66 ms | 1.53× | -//! | `escape generic` | 22.53 ms | 1.75× | -//! | `serde_json` | 24.65 ms | 1.92× | -//! | `json-escape` | 26.64 ms | 2.07× | +//! | **`escape simd`** | **12.67 ms** | **1.00×** | +//! | `escape v_jsonescape` | 20.58 ms | 1.62× | +//! | `escape generic` | 22.57 ms | 1.78× | +//! | `serde_json` | 24.52 ms | 1.94× | +//! | `json-escape` | 26.97 ms | 2.13× | //! //! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`) //! @@ -121,16 +121,25 @@ pub use generic::escape_generic; pub fn escape>(input: S) -> String { #[cfg(target_arch = "x86_64")] { + use generic::escape_inner; + + let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2); + result.push(b'"'); + let s = input.as_ref(); + let bytes = s.as_bytes(); // Runtime CPU feature detection for x86_64 if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") { - unsafe { return x86::escape_avx512(input) } + unsafe { x86::escape_avx512(bytes, &mut result) } } else if is_x86_feature_detected!("avx2") { - unsafe { return x86::escape_avx2(input) } + unsafe { x86::escape_avx2(bytes, &mut result) } } else if is_x86_feature_detected!("sse2") { - unsafe { return x86::escape_sse2(input) } + unsafe { x86::escape_sse2(bytes, &mut result) } } else { - return escape_generic(input); + escape_inner(bytes, &mut result); } + result.push(b'"'); + // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences) + unsafe { String::from_utf8_unchecked(result) } } #[cfg(target_arch = "aarch64")] diff --git a/src/x86.rs b/src/x86.rs index 1e5d56a..7b59926 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -31,17 +31,9 @@ fn sub(a: *const u8, b: *const u8) -> usize { #[target_feature(enable = "avx512f", enable = "avx512bw")] #[inline] -pub unsafe fn escape_avx512>(input: S) -> String { - let s = input.as_ref(); - let bytes = s.as_bytes(); +pub unsafe fn escape_avx512(bytes: &[u8], result: &mut Vec) { let len = bytes.len(); - // Pre-allocate with estimated capacity - let estimated_capacity = len + len / 2 + 2; - let mut result = Vec::with_capacity(estimated_capacity); - - result.push(b'"'); - let start_ptr = bytes.as_ptr(); let end_ptr = bytes[len..].as_ptr(); let mut ptr = start_ptr; @@ -80,7 +72,7 @@ pub unsafe fn escape_avx512>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; mask &= mask - 1; } @@ -143,11 +135,11 @@ pub unsafe fn escape_avx512>(input: S) -> String { start = sub(ptr, start_ptr) + LOOP_SIZE_AVX512; } else { // Process each 64-byte chunk that has escapes - process_mask_avx512(ptr, start_ptr, &mut result, &mut start, bytes, mask_a, 0); + process_mask_avx512(ptr, start_ptr, result, &mut start, bytes, mask_a, 0); process_mask_avx512( ptr, start_ptr, - &mut result, + result, &mut start, bytes, mask_b, @@ -156,7 +148,7 @@ pub unsafe fn escape_avx512>(input: S) -> String { process_mask_avx512( ptr, start_ptr, - &mut result, + result, &mut start, bytes, mask_c, @@ -165,7 +157,7 @@ pub unsafe fn escape_avx512>(input: S) -> String { process_mask_avx512( ptr, start_ptr, - &mut result, + result, &mut start, bytes, mask_d, @@ -199,7 +191,7 @@ pub unsafe fn escape_avx512>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; mask &= mask - 1; } @@ -229,7 +221,7 @@ pub unsafe fn escape_avx512>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; mask &= mask - 1; } @@ -237,31 +229,20 @@ pub unsafe fn escape_avx512>(input: S) -> String { } } else { // Fall back to AVX2 for small strings - return escape_avx2(input); + return escape_avx2(bytes, result); } // Copy any remaining bytes if start < len { result.extend_from_slice(&bytes[start..]); } - - result.push(b'"'); - unsafe { String::from_utf8_unchecked(result) } } #[target_feature(enable = "avx2")] #[inline] -pub unsafe fn escape_avx2>(input: S) -> String { - let s = input.as_ref(); - let bytes = s.as_bytes(); +pub unsafe fn escape_avx2(bytes: &[u8], result: &mut Vec) { let len = bytes.len(); - // Pre-allocate with estimated capacity - let estimated_capacity = len + len / 2 + 2; - let mut result = Vec::with_capacity(estimated_capacity); - - result.push(b'"'); - let start_ptr = bytes.as_ptr(); let end_ptr = bytes[len..].as_ptr(); let mut ptr = start_ptr; @@ -297,7 +278,7 @@ pub unsafe fn escape_avx2>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; } mask ^= 1 << cur; @@ -372,11 +353,11 @@ pub unsafe fn escape_avx2>(input: S) -> String { let mask_d = _mm256_movemask_epi8(cmp_d); // Process each 32-byte chunk that has escapes - process_mask_avx(ptr, start_ptr, &mut result, &mut start, bytes, mask_a, 0); + process_mask_avx(ptr, start_ptr, result, &mut start, bytes, mask_a, 0); process_mask_avx( ptr, start_ptr, - &mut result, + result, &mut start, bytes, mask_b, @@ -385,7 +366,7 @@ pub unsafe fn escape_avx2>(input: S) -> String { process_mask_avx( ptr, start_ptr, - &mut result, + result, &mut start, bytes, mask_c, @@ -394,7 +375,7 @@ pub unsafe fn escape_avx2>(input: S) -> String { process_mask_avx( ptr, start_ptr, - &mut result, + result, &mut start, bytes, mask_d, @@ -428,7 +409,7 @@ pub unsafe fn escape_avx2>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; } mask ^= 1 << cur; @@ -464,7 +445,7 @@ pub unsafe fn escape_avx2>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; } mask ^= 1 << cur; @@ -477,30 +458,20 @@ pub unsafe fn escape_avx2>(input: S) -> String { } } else { // Fall back to SSE2 for small strings - return escape_sse2(input); + return escape_sse2(bytes, result); } // Copy any remaining bytes if start < len { result.extend_from_slice(&bytes[start..]); } - - result.push(b'"'); - unsafe { String::from_utf8_unchecked(result) } } #[target_feature(enable = "sse2")] #[inline] -pub unsafe fn escape_sse2>(input: S) -> String { - let s = input.as_ref(); - let bytes = s.as_bytes(); +pub unsafe fn escape_sse2(bytes: &[u8], result: &mut Vec) { let len = bytes.len(); - let estimated_capacity = len + len / 2 + 2; - let mut result = Vec::with_capacity(estimated_capacity); - - result.push(b'"'); - let start_ptr = bytes.as_ptr(); let end_ptr = bytes[len..].as_ptr(); let mut ptr = start_ptr; @@ -518,7 +489,7 @@ pub unsafe fn escape_sse2>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; } ptr = ptr.offset(1); @@ -552,7 +523,7 @@ pub unsafe fn escape_sse2>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; } mask ^= 1 << cur; @@ -587,7 +558,7 @@ pub unsafe fn escape_sse2>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; } mask ^= 1 << cur; @@ -623,7 +594,7 @@ pub unsafe fn escape_sse2>(input: S) -> String { if start < i { result.extend_from_slice(&bytes[start..i]); } - write_escape(&mut result, escape_byte, c); + write_escape(result, escape_byte, c); start = i + 1; } mask ^= 1 << cur; @@ -640,9 +611,6 @@ pub unsafe fn escape_sse2>(input: S) -> String { if start < len { result.extend_from_slice(&bytes[start..]); } - - result.push(b'"'); - unsafe { String::from_utf8_unchecked(result) } } #[inline(always)]