|
| 1 | +#[inline] |
| 2 | +// Slightly modified version of |
| 3 | +// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079> |
| 4 | +// Borrowed from: |
| 5 | +// <https://github.com/oxc-project/oxc-sourcemap/blob/e533e6ca4d08c538d8d4df74eacd29437851591f/src/encode.rs#L331> |
| 6 | +pub fn escape_generic<S: AsRef<str>>(s: S) -> String { |
| 7 | + let s = s.as_ref(); |
| 8 | + let bytes = s.as_bytes(); |
| 9 | + |
| 10 | + // Estimate capacity - most strings don't need much escaping |
| 11 | + // Add some padding for potential escapes |
| 12 | + let estimated_capacity = bytes.len() + bytes.len() / 2 + 2; |
| 13 | + let mut result = Vec::with_capacity(estimated_capacity); |
| 14 | + |
| 15 | + result.push(b'"'); |
| 16 | + |
| 17 | + let mut start = 0; |
| 18 | + let mut i = 0; |
| 19 | + |
| 20 | + while i < bytes.len() { |
| 21 | + let b = bytes[i]; |
| 22 | + |
| 23 | + // Use lookup table to check if escaping is needed |
| 24 | + let escape_byte = ESCAPE[b as usize]; |
| 25 | + |
| 26 | + if escape_byte == 0 { |
| 27 | + // No escape needed, continue scanning |
| 28 | + i += 1; |
| 29 | + continue; |
| 30 | + } |
| 31 | + |
| 32 | + // Copy any unescaped bytes before this position |
| 33 | + if start < i { |
| 34 | + result.extend_from_slice(&bytes[start..i]); |
| 35 | + } |
| 36 | + |
| 37 | + // Handle the escape |
| 38 | + result.push(b'\\'); |
| 39 | + if escape_byte == b'u' { |
| 40 | + // Unicode escape for control characters |
| 41 | + result.extend_from_slice(b"u00"); |
| 42 | + let hex_digits = &HEX_BYTES[b as usize]; |
| 43 | + result.push(hex_digits.0); |
| 44 | + result.push(hex_digits.1); |
| 45 | + } else { |
| 46 | + // Simple escape |
| 47 | + result.push(escape_byte); |
| 48 | + } |
| 49 | + |
| 50 | + i += 1; |
| 51 | + start = i; |
| 52 | + } |
| 53 | + |
| 54 | + // Copy any remaining unescaped bytes |
| 55 | + if start < bytes.len() { |
| 56 | + result.extend_from_slice(&bytes[start..]); |
| 57 | + } |
| 58 | + |
| 59 | + result.push(b'"'); |
| 60 | + |
| 61 | + // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences) |
| 62 | + unsafe { String::from_utf8_unchecked(result) } |
| 63 | +} |
| 64 | + |
| 65 | +const BB: u8 = b'b'; // \x08 |
| 66 | +const TT: u8 = b't'; // \x09 |
| 67 | +const NN: u8 = b'n'; // \x0A |
| 68 | +const FF: u8 = b'f'; // \x0C |
| 69 | +const RR: u8 = b'r'; // \x0D |
| 70 | +const QU: u8 = b'"'; // \x22 |
| 71 | +const BS: u8 = b'\\'; // \x5C |
| 72 | +pub(crate) const UU: u8 = b'u'; // \x00...\x1F except the ones above |
| 73 | +const __: u8 = 0; |
| 74 | + |
| 75 | +// Lookup table of escape sequences. A value of b'x' at index i means that byte |
| 76 | +// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped. |
| 77 | +pub(crate) static ESCAPE: [u8; 256] = [ |
| 78 | + // 1 2 3 4 5 6 7 8 9 A B C D E F |
| 79 | + UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0 |
| 80 | + UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1 |
| 81 | + __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 |
| 82 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 |
| 83 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 |
| 84 | + __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 |
| 85 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 |
| 86 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 |
| 87 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 |
| 88 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 |
| 89 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A |
| 90 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B |
| 91 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C |
| 92 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D |
| 93 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E |
| 94 | + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F |
| 95 | +]; |
| 96 | + |
| 97 | +// Pre-computed hex digit pairs for control characters |
| 98 | +pub(crate) struct HexPair(u8, u8); |
| 99 | + |
| 100 | +pub(crate) static HEX_BYTES: [HexPair; 32] = [ |
| 101 | + HexPair(b'0', b'0'), |
| 102 | + HexPair(b'0', b'1'), |
| 103 | + HexPair(b'0', b'2'), |
| 104 | + HexPair(b'0', b'3'), |
| 105 | + HexPair(b'0', b'4'), |
| 106 | + HexPair(b'0', b'5'), |
| 107 | + HexPair(b'0', b'6'), |
| 108 | + HexPair(b'0', b'7'), |
| 109 | + HexPair(b'0', b'8'), |
| 110 | + HexPair(b'0', b'9'), |
| 111 | + HexPair(b'0', b'a'), |
| 112 | + HexPair(b'0', b'b'), |
| 113 | + HexPair(b'0', b'c'), |
| 114 | + HexPair(b'0', b'd'), |
| 115 | + HexPair(b'0', b'e'), |
| 116 | + HexPair(b'0', b'f'), |
| 117 | + HexPair(b'1', b'0'), |
| 118 | + HexPair(b'1', b'1'), |
| 119 | + HexPair(b'1', b'2'), |
| 120 | + HexPair(b'1', b'3'), |
| 121 | + HexPair(b'1', b'4'), |
| 122 | + HexPair(b'1', b'5'), |
| 123 | + HexPair(b'1', b'6'), |
| 124 | + HexPair(b'1', b'7'), |
| 125 | + HexPair(b'1', b'8'), |
| 126 | + HexPair(b'1', b'9'), |
| 127 | + HexPair(b'1', b'a'), |
| 128 | + HexPair(b'1', b'b'), |
| 129 | + HexPair(b'1', b'c'), |
| 130 | + HexPair(b'1', b'd'), |
| 131 | + HexPair(b'1', b'e'), |
| 132 | + HexPair(b'1', b'f'), |
| 133 | +]; |
0 commit comments