|
1 | 1 | export function detectBadJsonStrings(jsonString: string): boolean { |
2 | | - // Single regex with global flag to find all matches with their positions |
3 | | - const regex = /\\ud[89ab][0-9a-f]{2}|\\ud[cd][0-9a-f]{2}/g; |
4 | | - const matches: Array<{ index: number; isHigh: boolean }> = []; |
| 2 | + // Fast path: skip everything if no \u |
| 3 | + let idx = jsonString.indexOf("\\u"); |
| 4 | + if (idx === -1) return false; |
5 | 5 |
|
6 | | - let match; |
7 | | - while ((match = regex.exec(jsonString)) !== null) { |
8 | | - const isHigh = |
9 | | - match[0].startsWith("\\ud8") || |
10 | | - match[0].startsWith("\\ud9") || |
11 | | - match[0].startsWith("\\uda") || |
12 | | - match[0].startsWith("\\udb"); |
13 | | - matches.push({ index: match.index, isHigh }); |
14 | | - } |
15 | | - |
16 | | - if (matches.length === 0) { |
17 | | - return false; // No Unicode escapes found |
18 | | - } |
19 | | - |
20 | | - // Check for incomplete pairs |
21 | | - const highSurrogates = new Set<number>(); |
22 | | - const lowSurrogates = new Set<number>(); |
23 | | - |
24 | | - for (const { index, isHigh } of matches) { |
25 | | - if (isHigh) { |
26 | | - highSurrogates.add(index); |
27 | | - } else { |
28 | | - lowSurrogates.add(index); |
29 | | - } |
30 | | - } |
31 | | - |
32 | | - // Check for unmatched surrogates |
33 | | - for (const highIndex of highSurrogates) { |
34 | | - const expectedLowIndex = highIndex + 6; // Length of high surrogate |
35 | | - if (!lowSurrogates.has(expectedLowIndex)) { |
36 | | - return true; // Incomplete high surrogate |
37 | | - } |
38 | | - } |
39 | | - |
40 | | - for (const lowIndex of lowSurrogates) { |
41 | | - const expectedHighIndex = lowIndex - 6; // Length of low surrogate |
42 | | - if (!highSurrogates.has(expectedHighIndex)) { |
43 | | - return true; // Incomplete low surrogate |
| 6 | + // Only check the area around each \u |
| 7 | + while (idx !== -1 && idx < jsonString.length - 5) { |
| 8 | + if (jsonString[idx + 1] === "u" && jsonString[idx + 2] === "d") { |
| 9 | + const third = jsonString[idx + 3]; |
| 10 | + // High surrogate |
| 11 | + if ( |
| 12 | + /[89ab]/.test(third) && |
| 13 | + /[0-9a-f]/.test(jsonString[idx + 4]) && |
| 14 | + /[0-9a-f]/.test(jsonString[idx + 5]) |
| 15 | + ) { |
| 16 | + // Check for low surrogate after |
| 17 | + if ( |
| 18 | + jsonString.substr(idx + 6, 2) !== "\\u" || |
| 19 | + jsonString[idx + 8] !== "d" || |
| 20 | + !/[cd]/.test(jsonString[idx + 9]) || |
| 21 | + !/[0-9a-f]/.test(jsonString[idx + 10]) || |
| 22 | + !/[0-9a-f]/.test(jsonString[idx + 11]) |
| 23 | + ) { |
| 24 | + return true; // Incomplete high surrogate |
| 25 | + } |
| 26 | + } |
| 27 | + // Low surrogate |
| 28 | + if ( |
| 29 | + (third === "c" || third === "d") && |
| 30 | + /[0-9a-f]/.test(jsonString[idx + 4]) && |
| 31 | + /[0-9a-f]/.test(jsonString[idx + 5]) |
| 32 | + ) { |
| 33 | + // Check for high surrogate before |
| 34 | + if ( |
| 35 | + idx < 6 || |
| 36 | + jsonString.substr(idx - 6, 2) !== "\\u" || |
| 37 | + jsonString[idx - 4] !== "d" || |
| 38 | + !/[89ab]/.test(jsonString[idx - 3]) || |
| 39 | + !/[0-9a-f]/.test(jsonString[idx - 2]) || |
| 40 | + !/[0-9a-f]/.test(jsonString[idx - 1]) |
| 41 | + ) { |
| 42 | + return true; // Incomplete low surrogate |
| 43 | + } |
| 44 | + } |
44 | 45 | } |
| 46 | + idx = jsonString.indexOf("\\u", idx + 1); |
45 | 47 | } |
46 | | - |
47 | 48 | return false; |
48 | 49 | } |
0 commit comments