Skip to content

Commit b7617d9

Browse files
committed
Massively improved the performance
1 parent c2483e4 commit b7617d9

File tree

2 files changed

+90
-43
lines changed

2 files changed

+90
-43
lines changed
Lines changed: 43 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,49 @@
11
export function detectBadJsonStrings(jsonString: string): boolean {
2-
// Single regex with global flag to find all matches with their positions
3-
const regex = /\\ud[89ab][0-9a-f]{2}|\\ud[cd][0-9a-f]{2}/g;
4-
const matches: Array<{ index: number; isHigh: boolean }> = [];
2+
// Fast path: skip everything if no \u
3+
let idx = jsonString.indexOf("\\u");
4+
if (idx === -1) return false;
55

6-
let match;
7-
while ((match = regex.exec(jsonString)) !== null) {
8-
const isHigh =
9-
match[0].startsWith("\\ud8") ||
10-
match[0].startsWith("\\ud9") ||
11-
match[0].startsWith("\\uda") ||
12-
match[0].startsWith("\\udb");
13-
matches.push({ index: match.index, isHigh });
14-
}
15-
16-
if (matches.length === 0) {
17-
return false; // No Unicode escapes found
18-
}
19-
20-
// Check for incomplete pairs
21-
const highSurrogates = new Set<number>();
22-
const lowSurrogates = new Set<number>();
23-
24-
for (const { index, isHigh } of matches) {
25-
if (isHigh) {
26-
highSurrogates.add(index);
27-
} else {
28-
lowSurrogates.add(index);
29-
}
30-
}
31-
32-
// Check for unmatched surrogates
33-
for (const highIndex of highSurrogates) {
34-
const expectedLowIndex = highIndex + 6; // Length of high surrogate
35-
if (!lowSurrogates.has(expectedLowIndex)) {
36-
return true; // Incomplete high surrogate
37-
}
38-
}
39-
40-
for (const lowIndex of lowSurrogates) {
41-
const expectedHighIndex = lowIndex - 6; // Length of low surrogate
42-
if (!highSurrogates.has(expectedHighIndex)) {
43-
return true; // Incomplete low surrogate
6+
// Only check the area around each \u
7+
while (idx !== -1 && idx < jsonString.length - 5) {
8+
if (jsonString[idx + 1] === "u" && jsonString[idx + 2] === "d") {
9+
const third = jsonString[idx + 3];
10+
// High surrogate
11+
if (
12+
/[89ab]/.test(third) &&
13+
/[0-9a-f]/.test(jsonString[idx + 4]) &&
14+
/[0-9a-f]/.test(jsonString[idx + 5])
15+
) {
16+
// Check for low surrogate after
17+
if (
18+
jsonString.substr(idx + 6, 2) !== "\\u" ||
19+
jsonString[idx + 8] !== "d" ||
20+
!/[cd]/.test(jsonString[idx + 9]) ||
21+
!/[0-9a-f]/.test(jsonString[idx + 10]) ||
22+
!/[0-9a-f]/.test(jsonString[idx + 11])
23+
) {
24+
return true; // Incomplete high surrogate
25+
}
26+
}
27+
// Low surrogate
28+
if (
29+
(third === "c" || third === "d") &&
30+
/[0-9a-f]/.test(jsonString[idx + 4]) &&
31+
/[0-9a-f]/.test(jsonString[idx + 5])
32+
) {
33+
// Check for high surrogate before
34+
if (
35+
idx < 6 ||
36+
jsonString.substr(idx - 6, 2) !== "\\u" ||
37+
jsonString[idx - 4] !== "d" ||
38+
!/[89ab]/.test(jsonString[idx - 3]) ||
39+
!/[0-9a-f]/.test(jsonString[idx - 2]) ||
40+
!/[0-9a-f]/.test(jsonString[idx - 1])
41+
) {
42+
return true; // Incomplete low surrogate
43+
}
44+
}
4445
}
46+
idx = jsonString.indexOf("\\u", idx + 1);
4547
}
46-
4748
return false;
4849
}

apps/webapp/test/detectbadJsonStrings.test.ts

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ describe("detectBadJsonStrings", () => {
9797
expect(goodOverhead / iterations).toBeLessThan(0.01); // Less than 10 microseconds per call
9898

9999
// Bad JSON can be slower due to regex matching, but still reasonable
100-
expect(badOverhead / iterations).toBeLessThan(0.02); // Less than 20 microseconds per call
100+
expect(badOverhead / iterations).toBeLessThan(0.01); // Less than 20 microseconds per call
101101

102102
// Total overhead for 100k calls should be reasonable
103103
expect(goodOverhead).toBeLessThan(1000); // Less than 1 second for 100k calls
@@ -127,6 +127,52 @@ describe("detectBadJsonStrings", () => {
127127
expect(time / iterations).toBeLessThan(size / 1000); // Roughly linear scaling
128128
}
129129
});
130+
131+
it("should show significant performance improvement with quick rejection", () => {
132+
const longText = `hello world `.repeat(1_000);
133+
const goodJson = `{"title": "hello", "text": "${longText}"}`;
134+
const badJson = `{"title": "hello\\ud835", "text": "${longText}"}`;
135+
const noUnicodeJson = `{"title": "hello", "text": "${longText}"}`;
136+
137+
const iterations = 100_000;
138+
139+
// Warm up
140+
for (let i = 0; i < 1000; i++) {
141+
detectBadJsonStrings(goodJson);
142+
detectBadJsonStrings(badJson);
143+
detectBadJsonStrings(noUnicodeJson);
144+
}
145+
146+
// Test strings with no Unicode escapes (99.9% case)
147+
const noUnicodeStart = performance.now();
148+
for (let i = 0; i < iterations; i++) {
149+
detectBadJsonStrings(noUnicodeJson);
150+
}
151+
const noUnicodeTime = performance.now() - noUnicodeStart;
152+
153+
// Test strings with Unicode escapes (0.1% case)
154+
const withUnicodeStart = performance.now();
155+
for (let i = 0; i < iterations; i++) {
156+
detectBadJsonStrings(badJson);
157+
}
158+
const withUnicodeTime = performance.now() - withUnicodeStart;
159+
160+
console.log(
161+
`No Unicode escapes (${iterations} iterations): ${noUnicodeTime.toFixed(2)}ms (${(
162+
noUnicodeTime / iterations
163+
).toFixed(4)}ms per call)`
164+
);
165+
console.log(
166+
`With Unicode escapes (${iterations} iterations): ${withUnicodeTime.toFixed(2)}ms (${(
167+
withUnicodeTime / iterations
168+
).toFixed(4)}ms per call)`
169+
);
170+
console.log(
171+
`Performance ratio: ${(withUnicodeTime / noUnicodeTime).toFixed(
172+
2
173+
)}x slower for Unicode strings`
174+
);
175+
});
130176
});
131177

132178
function processPacket(data: string): { data?: string; dataType?: string } {

0 commit comments

Comments
 (0)