Skip to content

Commit adf3c3f

Browse files
authored
perf: reduce allocation on x86 (#13)
* perf: reduce allocation on x86 * docs: update benchmark result
1 parent f830bb2 commit adf3c3f

File tree

4 files changed

+67
-90
lines changed

4 files changed

+67
-90
lines changed

README.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,21 @@ Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion
2727

2828
| Implementation | Median time | vs fastest |
2929
| --------------------- | ------------- | ---------- |
30-
| **`escape simd`** | **345.06 µs** | **1.00×** |
31-
| `escape v_jsonescape` | 576.25 µs | 1.67× |
32-
| `escape generic` | 657.94 µs | 1.91× |
33-
| `serde_json` | 766.72 µs | 2.22× |
34-
| `json-escape` | 782.65 µs | 2.27× |
30+
| **`escape simd`** | **341.18 µs** | **1.00×** |
31+
| `escape v_jsonescape` | 555.47 µs | 1.63× |
32+
| `escape generic` | 656.85 µs | 1.93× |
33+
| `serde_json` | 744.75 µs | 2.18× |
34+
| `json-escape` | 777.15 µs | 2.28× |
3535

3636
**Fixtures payload (~300 iterations)**
3737

3838
| Implementation | Median time | vs fastest |
3939
| --------------------- | ------------ | ---------- |
40-
| **`escape simd`** | **12.84 ms** | **1.00×** |
41-
| `escape v_jsonescape` | 19.66 ms | 1.53× |
42-
| `escape generic` | 22.53 ms | 1.75× |
43-
| `serde_json` | 24.65 ms | 1.92× |
44-
| `json-escape` | 26.64 ms | 2.07× |
40+
| **`escape simd`** | **12.67 ms** | **1.00×** |
41+
| `escape v_jsonescape` | 20.58 ms | 1.62× |
42+
| `escape generic` | 22.57 ms | 1.78× |
43+
| `serde_json` | 24.52 ms | 1.94× |
44+
| `json-escape` | 26.97 ms | 2.13× |
4545

4646
### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
4747

src/generic.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
11
#[inline]
2-
// Slightly modified version of
3-
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
4-
// Borrowed from:
5-
// <https://github.com/oxc-project/oxc-sourcemap/blob/e533e6ca4d08c538d8d4df74eacd29437851591f/src/encode.rs#L331>
62
pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
73
let s = s.as_ref();
84
let bytes = s.as_bytes();
9-
105
// Estimate capacity - most strings don't need much escaping
116
// Add some padding for potential escapes
127
let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
138
let mut result = Vec::with_capacity(estimated_capacity);
14-
159
result.push(b'"');
10+
escape_inner(bytes, &mut result);
11+
result.push(b'"');
12+
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
13+
unsafe { String::from_utf8_unchecked(result) }
14+
}
1615

16+
#[inline]
17+
// Slightly modified version of
18+
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
19+
// Borrowed from:
20+
// <https://github.com/oxc-project/oxc-sourcemap/blob/e533e6ca4d08c538d8d4df74eacd29437851591f/src/encode.rs#L331>
21+
pub(crate) fn escape_inner(bytes: &[u8], result: &mut Vec<u8>) {
1722
let mut start = 0;
1823
let mut i = 0;
1924

@@ -55,11 +60,6 @@ pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
5560
if start < bytes.len() {
5661
result.extend_from_slice(&bytes[start..]);
5762
}
58-
59-
result.push(b'"');
60-
61-
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
62-
unsafe { String::from_utf8_unchecked(result) }
6363
}
6464

6565
const BB: u8 = b'b'; // \x08

src/lib.rs

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,21 @@
2222
//!
2323
//! | Implementation | Median time | vs fastest |
2424
//! | --------------------- | ------------- | ---------- |
25-
//! | **`escape simd`** | **345.06 µs** | **1.00×** |
26-
//! | `escape v_jsonescape` | 576.25 µs | 1.67× |
27-
//! | `escape generic` | 657.94 µs | 1.91× |
28-
//! | `serde_json` | 766.72 µs | 2.22× |
29-
//! | `json-escape` | 782.65 µs | 2.27× |
25+
//! | **`escape simd`** | **341.18 µs** | **1.00×** |
26+
//! | `escape v_jsonescape` | 555.47 µs | 1.63× |
27+
//! | `escape generic` | 656.85 µs | 1.93× |
28+
//! | `serde_json` | 744.75 µs | 2.18× |
29+
//! | `json-escape` | 777.15 µs | 2.28× |
3030
//!
3131
//! **Fixtures payload (~300 iterations)**
3232
//!
3333
//! | Implementation | Median time | vs fastest |
3434
//! | --------------------- | ------------ | ---------- |
35-
//! | **`escape simd`** | **12.84 ms** | **1.00×** |
36-
//! | `escape v_jsonescape` | 19.66 ms | 1.53× |
37-
//! | `escape generic` | 22.53 ms | 1.75× |
38-
//! | `serde_json` | 24.65 ms | 1.92× |
39-
//! | `json-escape` | 26.64 ms | 2.07× |
35+
//! | **`escape simd`** | **12.67 ms** | **1.00×** |
36+
//! | `escape v_jsonescape` | 20.58 ms | 1.62× |
37+
//! | `escape generic` | 22.57 ms | 1.78× |
38+
//! | `serde_json` | 24.52 ms | 1.94× |
39+
//! | `json-escape` | 26.97 ms | 2.13× |
4040
//!
4141
//! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
4242
//!
@@ -121,16 +121,25 @@ pub use generic::escape_generic;
121121
pub fn escape<S: AsRef<str>>(input: S) -> String {
122122
#[cfg(target_arch = "x86_64")]
123123
{
124+
use generic::escape_inner;
125+
126+
let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
127+
result.push(b'"');
128+
let s = input.as_ref();
129+
let bytes = s.as_bytes();
124130
// Runtime CPU feature detection for x86_64
125131
if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") {
126-
unsafe { return x86::escape_avx512(input) }
132+
unsafe { x86::escape_avx512(bytes, &mut result) }
127133
} else if is_x86_feature_detected!("avx2") {
128-
unsafe { return x86::escape_avx2(input) }
134+
unsafe { x86::escape_avx2(bytes, &mut result) }
129135
} else if is_x86_feature_detected!("sse2") {
130-
unsafe { return x86::escape_sse2(input) }
136+
unsafe { x86::escape_sse2(bytes, &mut result) }
131137
} else {
132-
return escape_generic(input);
138+
escape_inner(bytes, &mut result);
133139
}
140+
result.push(b'"');
141+
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
142+
unsafe { String::from_utf8_unchecked(result) }
134143
}
135144

136145
#[cfg(target_arch = "aarch64")]

src/x86.rs

Lines changed: 23 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,9 @@ fn sub(a: *const u8, b: *const u8) -> usize {
3131

3232
#[target_feature(enable = "avx512f", enable = "avx512bw")]
3333
#[inline]
34-
pub unsafe fn escape_avx512<S: AsRef<str>>(input: S) -> String {
35-
let s = input.as_ref();
36-
let bytes = s.as_bytes();
34+
pub unsafe fn escape_avx512(bytes: &[u8], result: &mut Vec<u8>) {
3735
let len = bytes.len();
3836

39-
// Pre-allocate with estimated capacity
40-
let estimated_capacity = len + len / 2 + 2;
41-
let mut result = Vec::with_capacity(estimated_capacity);
42-
43-
result.push(b'"');
44-
4537
let start_ptr = bytes.as_ptr();
4638
let end_ptr = bytes[len..].as_ptr();
4739
let mut ptr = start_ptr;
@@ -80,7 +72,7 @@ pub unsafe fn escape_avx512<S: AsRef<str>>(input: S) -> String {
8072
if start < i {
8173
result.extend_from_slice(&bytes[start..i]);
8274
}
83-
write_escape(&mut result, escape_byte, c);
75+
write_escape(result, escape_byte, c);
8476
start = i + 1;
8577
mask &= mask - 1;
8678
}
@@ -143,11 +135,11 @@ pub unsafe fn escape_avx512<S: AsRef<str>>(input: S) -> String {
143135
start = sub(ptr, start_ptr) + LOOP_SIZE_AVX512;
144136
} else {
145137
// Process each 64-byte chunk that has escapes
146-
process_mask_avx512(ptr, start_ptr, &mut result, &mut start, bytes, mask_a, 0);
138+
process_mask_avx512(ptr, start_ptr, result, &mut start, bytes, mask_a, 0);
147139
process_mask_avx512(
148140
ptr,
149141
start_ptr,
150-
&mut result,
142+
result,
151143
&mut start,
152144
bytes,
153145
mask_b,
@@ -156,7 +148,7 @@ pub unsafe fn escape_avx512<S: AsRef<str>>(input: S) -> String {
156148
process_mask_avx512(
157149
ptr,
158150
start_ptr,
159-
&mut result,
151+
result,
160152
&mut start,
161153
bytes,
162154
mask_c,
@@ -165,7 +157,7 @@ pub unsafe fn escape_avx512<S: AsRef<str>>(input: S) -> String {
165157
process_mask_avx512(
166158
ptr,
167159
start_ptr,
168-
&mut result,
160+
result,
169161
&mut start,
170162
bytes,
171163
mask_d,
@@ -199,7 +191,7 @@ pub unsafe fn escape_avx512<S: AsRef<str>>(input: S) -> String {
199191
if start < i {
200192
result.extend_from_slice(&bytes[start..i]);
201193
}
202-
write_escape(&mut result, escape_byte, c);
194+
write_escape(result, escape_byte, c);
203195
start = i + 1;
204196
mask &= mask - 1;
205197
}
@@ -229,39 +221,28 @@ pub unsafe fn escape_avx512<S: AsRef<str>>(input: S) -> String {
229221
if start < i {
230222
result.extend_from_slice(&bytes[start..i]);
231223
}
232-
write_escape(&mut result, escape_byte, c);
224+
write_escape(result, escape_byte, c);
233225
start = i + 1;
234226
mask &= mask - 1;
235227
}
236228
}
237229
}
238230
} else {
239231
// Fall back to AVX2 for small strings
240-
return escape_avx2(input);
232+
return escape_avx2(bytes, result);
241233
}
242234

243235
// Copy any remaining bytes
244236
if start < len {
245237
result.extend_from_slice(&bytes[start..]);
246238
}
247-
248-
result.push(b'"');
249-
unsafe { String::from_utf8_unchecked(result) }
250239
}
251240

252241
#[target_feature(enable = "avx2")]
253242
#[inline]
254-
pub unsafe fn escape_avx2<S: AsRef<str>>(input: S) -> String {
255-
let s = input.as_ref();
256-
let bytes = s.as_bytes();
243+
pub unsafe fn escape_avx2(bytes: &[u8], result: &mut Vec<u8>) {
257244
let len = bytes.len();
258245

259-
// Pre-allocate with estimated capacity
260-
let estimated_capacity = len + len / 2 + 2;
261-
let mut result = Vec::with_capacity(estimated_capacity);
262-
263-
result.push(b'"');
264-
265246
let start_ptr = bytes.as_ptr();
266247
let end_ptr = bytes[len..].as_ptr();
267248
let mut ptr = start_ptr;
@@ -297,7 +278,7 @@ pub unsafe fn escape_avx2<S: AsRef<str>>(input: S) -> String {
297278
if start < i {
298279
result.extend_from_slice(&bytes[start..i]);
299280
}
300-
write_escape(&mut result, escape_byte, c);
281+
write_escape(result, escape_byte, c);
301282
start = i + 1;
302283
}
303284
mask ^= 1 << cur;
@@ -372,11 +353,11 @@ pub unsafe fn escape_avx2<S: AsRef<str>>(input: S) -> String {
372353
let mask_d = _mm256_movemask_epi8(cmp_d);
373354

374355
// Process each 32-byte chunk that has escapes
375-
process_mask_avx(ptr, start_ptr, &mut result, &mut start, bytes, mask_a, 0);
356+
process_mask_avx(ptr, start_ptr, result, &mut start, bytes, mask_a, 0);
376357
process_mask_avx(
377358
ptr,
378359
start_ptr,
379-
&mut result,
360+
result,
380361
&mut start,
381362
bytes,
382363
mask_b,
@@ -385,7 +366,7 @@ pub unsafe fn escape_avx2<S: AsRef<str>>(input: S) -> String {
385366
process_mask_avx(
386367
ptr,
387368
start_ptr,
388-
&mut result,
369+
result,
389370
&mut start,
390371
bytes,
391372
mask_c,
@@ -394,7 +375,7 @@ pub unsafe fn escape_avx2<S: AsRef<str>>(input: S) -> String {
394375
process_mask_avx(
395376
ptr,
396377
start_ptr,
397-
&mut result,
378+
result,
398379
&mut start,
399380
bytes,
400381
mask_d,
@@ -428,7 +409,7 @@ pub unsafe fn escape_avx2<S: AsRef<str>>(input: S) -> String {
428409
if start < i {
429410
result.extend_from_slice(&bytes[start..i]);
430411
}
431-
write_escape(&mut result, escape_byte, c);
412+
write_escape(result, escape_byte, c);
432413
start = i + 1;
433414
}
434415
mask ^= 1 << cur;
@@ -464,7 +445,7 @@ pub unsafe fn escape_avx2<S: AsRef<str>>(input: S) -> String {
464445
if start < i {
465446
result.extend_from_slice(&bytes[start..i]);
466447
}
467-
write_escape(&mut result, escape_byte, c);
448+
write_escape(result, escape_byte, c);
468449
start = i + 1;
469450
}
470451
mask ^= 1 << cur;
@@ -477,30 +458,20 @@ pub unsafe fn escape_avx2<S: AsRef<str>>(input: S) -> String {
477458
}
478459
} else {
479460
// Fall back to SSE2 for small strings
480-
return escape_sse2(input);
461+
return escape_sse2(bytes, result);
481462
}
482463

483464
// Copy any remaining bytes
484465
if start < len {
485466
result.extend_from_slice(&bytes[start..]);
486467
}
487-
488-
result.push(b'"');
489-
unsafe { String::from_utf8_unchecked(result) }
490468
}
491469

492470
#[target_feature(enable = "sse2")]
493471
#[inline]
494-
pub unsafe fn escape_sse2<S: AsRef<str>>(input: S) -> String {
495-
let s = input.as_ref();
496-
let bytes = s.as_bytes();
472+
pub unsafe fn escape_sse2(bytes: &[u8], result: &mut Vec<u8>) {
497473
let len = bytes.len();
498474

499-
let estimated_capacity = len + len / 2 + 2;
500-
let mut result = Vec::with_capacity(estimated_capacity);
501-
502-
result.push(b'"');
503-
504475
let start_ptr = bytes.as_ptr();
505476
let end_ptr = bytes[len..].as_ptr();
506477
let mut ptr = start_ptr;
@@ -518,7 +489,7 @@ pub unsafe fn escape_sse2<S: AsRef<str>>(input: S) -> String {
518489
if start < i {
519490
result.extend_from_slice(&bytes[start..i]);
520491
}
521-
write_escape(&mut result, escape_byte, c);
492+
write_escape(result, escape_byte, c);
522493
start = i + 1;
523494
}
524495
ptr = ptr.offset(1);
@@ -552,7 +523,7 @@ pub unsafe fn escape_sse2<S: AsRef<str>>(input: S) -> String {
552523
if start < i {
553524
result.extend_from_slice(&bytes[start..i]);
554525
}
555-
write_escape(&mut result, escape_byte, c);
526+
write_escape(result, escape_byte, c);
556527
start = i + 1;
557528
}
558529
mask ^= 1 << cur;
@@ -587,7 +558,7 @@ pub unsafe fn escape_sse2<S: AsRef<str>>(input: S) -> String {
587558
if start < i {
588559
result.extend_from_slice(&bytes[start..i]);
589560
}
590-
write_escape(&mut result, escape_byte, c);
561+
write_escape(result, escape_byte, c);
591562
start = i + 1;
592563
}
593564
mask ^= 1 << cur;
@@ -623,7 +594,7 @@ pub unsafe fn escape_sse2<S: AsRef<str>>(input: S) -> String {
623594
if start < i {
624595
result.extend_from_slice(&bytes[start..i]);
625596
}
626-
write_escape(&mut result, escape_byte, c);
597+
write_escape(result, escape_byte, c);
627598
start = i + 1;
628599
}
629600
mask ^= 1 << cur;
@@ -640,9 +611,6 @@ pub unsafe fn escape_sse2<S: AsRef<str>>(input: S) -> String {
640611
if start < len {
641612
result.extend_from_slice(&bytes[start..]);
642613
}
643-
644-
result.push(b'"');
645-
unsafe { String::from_utf8_unchecked(result) }
646614
}
647615

648616
#[inline(always)]

0 commit comments

Comments
 (0)