Skip to content

Commit c60e1a6

Browse files
committed
chore: use oxc_sourcemap version of escape_generic
1 parent 94e8297 commit c60e1a6

File tree

2 files changed

+135
-63
lines changed

2 files changed

+135
-63
lines changed

src/generic.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#[inline]
2+
// Slightly modified version of
3+
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
4+
pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
5+
let s = s.as_ref();
6+
let bytes = s.as_bytes();
7+
8+
// Estimate capacity - most strings don't need much escaping
9+
// Add some padding for potential escapes
10+
let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
11+
let mut result = Vec::with_capacity(estimated_capacity);
12+
13+
result.push(b'"');
14+
15+
let mut start = 0;
16+
let mut i = 0;
17+
18+
while i < bytes.len() {
19+
let b = bytes[i];
20+
21+
// Use lookup table to check if escaping is needed
22+
let escape_byte = ESCAPE[b as usize];
23+
24+
if escape_byte == 0 {
25+
// No escape needed, continue scanning
26+
i += 1;
27+
continue;
28+
}
29+
30+
// Copy any unescaped bytes before this position
31+
if start < i {
32+
result.extend_from_slice(&bytes[start..i]);
33+
}
34+
35+
// Handle the escape
36+
result.push(b'\\');
37+
if escape_byte == b'u' {
38+
// Unicode escape for control characters
39+
result.extend_from_slice(b"u00");
40+
let hex_digits = &HEX_BYTES[b as usize];
41+
result.push(hex_digits.0);
42+
result.push(hex_digits.1);
43+
} else {
44+
// Simple escape
45+
result.push(escape_byte);
46+
}
47+
48+
i += 1;
49+
start = i;
50+
}
51+
52+
// Copy any remaining unescaped bytes
53+
if start < bytes.len() {
54+
result.extend_from_slice(&bytes[start..]);
55+
}
56+
57+
result.push(b'"');
58+
59+
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
60+
unsafe { String::from_utf8_unchecked(result) }
61+
}
62+
63+
const BB: u8 = b'b'; // \x08
64+
const TT: u8 = b't'; // \x09
65+
const NN: u8 = b'n'; // \x0A
66+
const FF: u8 = b'f'; // \x0C
67+
const RR: u8 = b'r'; // \x0D
68+
const QU: u8 = b'"'; // \x22
69+
const BS: u8 = b'\\'; // \x5C
70+
pub(crate) const UU: u8 = b'u'; // \x00...\x1F except the ones above
71+
const __: u8 = 0;
72+
73+
// Lookup table of escape sequences. A value of b'x' at index i means that byte
74+
// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
75+
pub(crate) static ESCAPE: [u8; 256] = [
76+
// 1 2 3 4 5 6 7 8 9 A B C D E F
77+
UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
78+
UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
79+
__, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
80+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
81+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
82+
__, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
83+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
84+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
85+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
86+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
87+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
88+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
89+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
90+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
91+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
92+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
93+
];
94+
95+
// Pre-computed hex digit pairs for control characters
96+
pub(crate) struct HexPair(u8, u8);
97+
98+
pub(crate) static HEX_BYTES: [HexPair; 32] = [
99+
HexPair(b'0', b'0'),
100+
HexPair(b'0', b'1'),
101+
HexPair(b'0', b'2'),
102+
HexPair(b'0', b'3'),
103+
HexPair(b'0', b'4'),
104+
HexPair(b'0', b'5'),
105+
HexPair(b'0', b'6'),
106+
HexPair(b'0', b'7'),
107+
HexPair(b'0', b'8'),
108+
HexPair(b'0', b'9'),
109+
HexPair(b'0', b'a'),
110+
HexPair(b'0', b'b'),
111+
HexPair(b'0', b'c'),
112+
HexPair(b'0', b'd'),
113+
HexPair(b'0', b'e'),
114+
HexPair(b'0', b'f'),
115+
HexPair(b'1', b'0'),
116+
HexPair(b'1', b'1'),
117+
HexPair(b'1', b'2'),
118+
HexPair(b'1', b'3'),
119+
HexPair(b'1', b'4'),
120+
HexPair(b'1', b'5'),
121+
HexPair(b'1', b'6'),
122+
HexPair(b'1', b'7'),
123+
HexPair(b'1', b'8'),
124+
HexPair(b'1', b'9'),
125+
HexPair(b'1', b'a'),
126+
HexPair(b'1', b'b'),
127+
HexPair(b'1', b'c'),
128+
HexPair(b'1', b'd'),
129+
HexPair(b'1', b'e'),
130+
HexPair(b'1', b'f'),
131+
];

src/lib.rs

Lines changed: 4 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,13 @@
108108
//! | `escape v_jsonescape` | 21.09 ms | 1.18× |
109109
//! | `json-escape` | 22.43 ms | 1.25× |
110110
111+
#[cfg(target_arch = "aarch64")]
112+
mod aarch64;
113+
mod generic;
111114
#[cfg(target_arch = "x86_64")]
112115
mod x86;
113116

114-
#[cfg(target_arch = "aarch64")]
115-
mod aarch64;
117+
pub use generic::escape_generic;
116118

117119
const BB: u8 = b'b'; // \x08
118120
const TT: u8 = b't'; // \x09
@@ -170,67 +172,6 @@ pub(crate) const HEX_BYTES: [(u8, u8); 256] = {
170172
bytes
171173
};
172174

173-
#[inline]
174-
/// Cross platform generic implementation without any platform specific instructions
175-
pub fn escape_generic<S: AsRef<str>>(input: S) -> String {
176-
let s = input.as_ref();
177-
let bytes = s.as_bytes();
178-
179-
// Estimate capacity - most strings don't need much escaping
180-
// Add some padding for potential escapes
181-
let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
182-
let mut result = Vec::with_capacity(estimated_capacity);
183-
184-
result.push(b'"');
185-
186-
let mut start = 0;
187-
let mut i = 0;
188-
189-
while i < bytes.len() {
190-
let b = bytes[i];
191-
192-
// Use lookup table to check if escaping is needed
193-
let escape_byte = ESCAPE[b as usize];
194-
195-
if escape_byte == 0 {
196-
// No escape needed, continue scanning
197-
i += 1;
198-
continue;
199-
}
200-
201-
// Copy any unescaped bytes before this position
202-
if start < i {
203-
result.extend_from_slice(&bytes[start..i]);
204-
}
205-
206-
// Handle the escape
207-
result.push(b'\\');
208-
if escape_byte == UU {
209-
// Unicode escape for control characters
210-
result.extend_from_slice(b"u00");
211-
let hex_digits = &HEX_BYTES[b as usize];
212-
result.push(hex_digits.0);
213-
result.push(hex_digits.1);
214-
} else {
215-
// Simple escape
216-
result.push(escape_byte);
217-
}
218-
219-
i += 1;
220-
start = i;
221-
}
222-
223-
// Copy any remaining unescaped bytes
224-
if start < bytes.len() {
225-
result.extend_from_slice(&bytes[start..]);
226-
}
227-
228-
result.push(b'"');
229-
230-
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
231-
unsafe { String::from_utf8_unchecked(result) }
232-
}
233-
234175
/// Main entry point for JSON string escaping with SIMD acceleration
235176
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
236177
pub fn escape<S: AsRef<str>>(input: S) -> String {

0 commit comments

Comments
 (0)