Skip to content

Commit ff34516

Browse files
authored
chore: use oxc_sourcemap version of escape_generic (#9)
1 parent 94e8297 commit ff34516

File tree

2 files changed

+137
-63
lines changed

2 files changed

+137
-63
lines changed

src/generic.rs

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#[inline]
2+
// Slightly modified version of
3+
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
4+
// Borrowed from:
5+
// <https://github.com/oxc-project/oxc-sourcemap/blob/e533e6ca4d08c538d8d4df74eacd29437851591f/src/encode.rs#L331>
6+
pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
7+
let s = s.as_ref();
8+
let bytes = s.as_bytes();
9+
10+
// Estimate capacity - most strings don't need much escaping
11+
// Add some padding for potential escapes
12+
let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
13+
let mut result = Vec::with_capacity(estimated_capacity);
14+
15+
result.push(b'"');
16+
17+
let mut start = 0;
18+
let mut i = 0;
19+
20+
while i < bytes.len() {
21+
let b = bytes[i];
22+
23+
// Use lookup table to check if escaping is needed
24+
let escape_byte = ESCAPE[b as usize];
25+
26+
if escape_byte == 0 {
27+
// No escape needed, continue scanning
28+
i += 1;
29+
continue;
30+
}
31+
32+
// Copy any unescaped bytes before this position
33+
if start < i {
34+
result.extend_from_slice(&bytes[start..i]);
35+
}
36+
37+
// Handle the escape
38+
result.push(b'\\');
39+
if escape_byte == b'u' {
40+
// Unicode escape for control characters
41+
result.extend_from_slice(b"u00");
42+
let hex_digits = &HEX_BYTES[b as usize];
43+
result.push(hex_digits.0);
44+
result.push(hex_digits.1);
45+
} else {
46+
// Simple escape
47+
result.push(escape_byte);
48+
}
49+
50+
i += 1;
51+
start = i;
52+
}
53+
54+
// Copy any remaining unescaped bytes
55+
if start < bytes.len() {
56+
result.extend_from_slice(&bytes[start..]);
57+
}
58+
59+
result.push(b'"');
60+
61+
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
62+
unsafe { String::from_utf8_unchecked(result) }
63+
}
64+
65+
const BB: u8 = b'b'; // \x08
66+
const TT: u8 = b't'; // \x09
67+
const NN: u8 = b'n'; // \x0A
68+
const FF: u8 = b'f'; // \x0C
69+
const RR: u8 = b'r'; // \x0D
70+
const QU: u8 = b'"'; // \x22
71+
const BS: u8 = b'\\'; // \x5C
72+
pub(crate) const UU: u8 = b'u'; // \x00...\x1F except the ones above
73+
const __: u8 = 0;
74+
75+
// Lookup table of escape sequences. A value of b'x' at index i means that byte
76+
// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
77+
pub(crate) static ESCAPE: [u8; 256] = [
78+
// 1 2 3 4 5 6 7 8 9 A B C D E F
79+
UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
80+
UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
81+
__, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
82+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
83+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
84+
__, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
85+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
86+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
87+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
88+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
89+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
90+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
91+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
92+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
93+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
94+
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
95+
];
96+
97+
// Pre-computed hex digit pairs for control characters
98+
pub(crate) struct HexPair(u8, u8);
99+
100+
pub(crate) static HEX_BYTES: [HexPair; 32] = [
101+
HexPair(b'0', b'0'),
102+
HexPair(b'0', b'1'),
103+
HexPair(b'0', b'2'),
104+
HexPair(b'0', b'3'),
105+
HexPair(b'0', b'4'),
106+
HexPair(b'0', b'5'),
107+
HexPair(b'0', b'6'),
108+
HexPair(b'0', b'7'),
109+
HexPair(b'0', b'8'),
110+
HexPair(b'0', b'9'),
111+
HexPair(b'0', b'a'),
112+
HexPair(b'0', b'b'),
113+
HexPair(b'0', b'c'),
114+
HexPair(b'0', b'd'),
115+
HexPair(b'0', b'e'),
116+
HexPair(b'0', b'f'),
117+
HexPair(b'1', b'0'),
118+
HexPair(b'1', b'1'),
119+
HexPair(b'1', b'2'),
120+
HexPair(b'1', b'3'),
121+
HexPair(b'1', b'4'),
122+
HexPair(b'1', b'5'),
123+
HexPair(b'1', b'6'),
124+
HexPair(b'1', b'7'),
125+
HexPair(b'1', b'8'),
126+
HexPair(b'1', b'9'),
127+
HexPair(b'1', b'a'),
128+
HexPair(b'1', b'b'),
129+
HexPair(b'1', b'c'),
130+
HexPair(b'1', b'd'),
131+
HexPair(b'1', b'e'),
132+
HexPair(b'1', b'f'),
133+
];

src/lib.rs

Lines changed: 4 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,13 @@
108108
//! | `escape v_jsonescape` | 21.09 ms | 1.18× |
109109
//! | `json-escape` | 22.43 ms | 1.25× |
110110
111+
#[cfg(target_arch = "aarch64")]
112+
mod aarch64;
113+
mod generic;
111114
#[cfg(target_arch = "x86_64")]
112115
mod x86;
113116

114-
#[cfg(target_arch = "aarch64")]
115-
mod aarch64;
117+
pub use generic::escape_generic;
116118

117119
const BB: u8 = b'b'; // \x08
118120
const TT: u8 = b't'; // \x09
@@ -170,67 +172,6 @@ pub(crate) const HEX_BYTES: [(u8, u8); 256] = {
170172
bytes
171173
};
172174

173-
#[inline]
174-
/// Cross platform generic implementation without any platform specific instructions
175-
pub fn escape_generic<S: AsRef<str>>(input: S) -> String {
176-
let s = input.as_ref();
177-
let bytes = s.as_bytes();
178-
179-
// Estimate capacity - most strings don't need much escaping
180-
// Add some padding for potential escapes
181-
let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
182-
let mut result = Vec::with_capacity(estimated_capacity);
183-
184-
result.push(b'"');
185-
186-
let mut start = 0;
187-
let mut i = 0;
188-
189-
while i < bytes.len() {
190-
let b = bytes[i];
191-
192-
// Use lookup table to check if escaping is needed
193-
let escape_byte = ESCAPE[b as usize];
194-
195-
if escape_byte == 0 {
196-
// No escape needed, continue scanning
197-
i += 1;
198-
continue;
199-
}
200-
201-
// Copy any unescaped bytes before this position
202-
if start < i {
203-
result.extend_from_slice(&bytes[start..i]);
204-
}
205-
206-
// Handle the escape
207-
result.push(b'\\');
208-
if escape_byte == UU {
209-
// Unicode escape for control characters
210-
result.extend_from_slice(b"u00");
211-
let hex_digits = &HEX_BYTES[b as usize];
212-
result.push(hex_digits.0);
213-
result.push(hex_digits.1);
214-
} else {
215-
// Simple escape
216-
result.push(escape_byte);
217-
}
218-
219-
i += 1;
220-
start = i;
221-
}
222-
223-
// Copy any remaining unescaped bytes
224-
if start < bytes.len() {
225-
result.extend_from_slice(&bytes[start..]);
226-
}
227-
228-
result.push(b'"');
229-
230-
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
231-
unsafe { String::from_utf8_unchecked(result) }
232-
}
233-
234175
/// Main entry point for JSON string escaping with SIMD acceleration
235176
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
236177
pub fn escape<S: AsRef<str>>(input: S) -> String {

0 commit comments

Comments
 (0)