Skip to content

Commit 11461bf

Browse files
committed
feat: faster swar strings
1 parent baf3132 commit 11461bf

File tree

10 files changed

+537
-394
lines changed

10 files changed

+537
-394
lines changed

assembly/__tests__/string.spec.ts

Lines changed: 225 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,242 @@
11
import { JSON } from "..";
22
import { describe, expect } from "./lib";
33

4-
describe("Should serialize strings", () => {
4+
describe("Should serialize strings - Basic", () => {
55
expect(JSON.stringify("abcdefg")).toBe('"abcdefg"');
6-
76
expect(JSON.stringify('st"ring" w""ith quotes"')).toBe('"st\\"ring\\" w\\"\\"ith quotes\\""');
8-
97
expect(JSON.stringify('string "with random spa\nces and \nnewlines\n\n\n')).toBe('"string \\"with random spa\\nces and \\nnewlines\\n\\n\\n"');
10-
118
expect(JSON.stringify('string with colon : comma , brace [ ] bracket { } and quote " and other quote \\"')).toBe('"string with colon : comma , brace [ ] bracket { } and quote \\" and other quote \\\\\\""');
12-
139
expect(JSON.stringify("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u000f\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f")).toBe('"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000b\\f\\r\\u000e\\u000f\\u000f\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001a\\u001b\\u001c\\u001d\\u001e\\u001f"');
14-
1510
expect(JSON.stringify("abcdYZ12345890sdfw\"vie91kfESDFOK12i9i12dsf./?")).toBe('"abcdYZ12345890sdfw\\"vie91kfESDFOK12i9i12dsf./?"');
1611
});
1712

18-
describe("Should deserialize strings", () => {
19-
expect(JSON.parse<string>('"abcdefg"')).toBe("abcdefg");
13+
describe("Should serialize strings - Empty and whitespace", () => {
14+
expect(JSON.stringify("")).toBe('""');
15+
expect(JSON.stringify(" ")).toBe('" "');
16+
expect(JSON.stringify(" ")).toBe('" "');
17+
expect(JSON.stringify("\t")).toBe('"\\t"');
18+
expect(JSON.stringify("\n")).toBe('"\\n"');
19+
expect(JSON.stringify("\r")).toBe('"\\r"');
20+
expect(JSON.stringify("\r\n")).toBe('"\\r\\n"');
21+
expect(JSON.stringify(" \t\n\r ")).toBe('" \\t\\n\\r "');
22+
});
2023

21-
expect(JSON.parse<string>('"\\"st\\\\\\"ring\\\\\\" w\\\\\\"\\\\\\"ith quotes\\\\\\"\\""')).toBe('"st\\"ring\\" w\\"\\"ith quotes\\""');
24+
describe("Should serialize strings - Special characters", () => {
25+
expect(JSON.stringify("\"")).toBe('"\\\""');
26+
expect(JSON.stringify("\\")).toBe('"\\\\"');
27+
expect(JSON.stringify("\"\\")).toBe('"\\"\\\\\"');
28+
expect(JSON.stringify("\\\"")).toBe('"\\\\\\""');
29+
expect(JSON.stringify("/")).toBe('"/"');
30+
expect(JSON.stringify("\b")).toBe('"\\b"');
31+
expect(JSON.stringify("\f")).toBe('"\\f"');
32+
});
2233

23-
expect(JSON.parse<string>('"\\"string \\\\\\"with random spa\\\\nces and \\\\nnewlines\\\\n\\\\n\\\\n\\""')).toBe('"string \\"with random spa\\nces and \\nnewlines\\n\\n\\n"');
34+
describe("Should serialize strings - Control characters", () => {
35+
expect(JSON.stringify("\u0000")).toBe('"\\u0000"');
36+
expect(JSON.stringify("\u0001")).toBe('"\\u0001"');
37+
expect(JSON.stringify("\u001f")).toBe('"\\u001f"');
38+
expect(JSON.stringify("\u0008")).toBe('"\\b"');
39+
expect(JSON.stringify("\u0009")).toBe('"\\t"');
40+
expect(JSON.stringify("\u000a")).toBe('"\\n"');
41+
expect(JSON.stringify("\u000c")).toBe('"\\f"');
42+
expect(JSON.stringify("\u000d")).toBe('"\\r"');
43+
});
2444

25-
expect(JSON.parse<string>('"\\"string with colon : comma , brace [ ] bracket { } and quote \\\\\\" and other quote \\\\\\\\\\"\\""')).toBe('"string with colon : comma , brace [ ] bracket { } and quote \\" and other quote \\\\""');
45+
describe("Should serialize strings - Boundary cases", () => {
46+
expect(JSON.stringify("\u001f")).toBe('"\\u001f"'); // Last control char
47+
expect(JSON.stringify(" ")).toBe('" "'); // Space (32) - NOT escaped
48+
expect(JSON.stringify("!")).toBe('"!"'); // First printable (33)
49+
expect(JSON.stringify("~")).toBe('"~"'); // Last ASCII (126)
50+
expect(JSON.stringify("\u007f")).toBe('"\u007f"'); // DEL (127)
51+
});
2652

27-
expect(JSON.parse<string>('"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000b\\f\\r\\u000e\\u000f\\u000f\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001a\\u001b\\u001c\\u001d\\u001e\\u001f"')).toBe("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u000f\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f");
53+
describe("Should serialize strings - Mixed escapes", () => {
54+
expect(JSON.stringify("abc\"def\\ghi")).toBe('"abc\\"def\\\\ghi"');
55+
expect(JSON.stringify("line1\nline2\rline3")).toBe('"line1\\nline2\\rline3"');
56+
expect(JSON.stringify("\t\t\t")).toBe('"\\t\\t\\t"');
57+
expect(JSON.stringify("\"\"\"")).toBe('"\\"\\"\\""');
58+
expect(JSON.stringify("\\\\\\")).toBe('"\\\\\\\\\\\\"');
59+
expect(JSON.stringify("a\nb\tc\"d\\e")).toBe('"a\\nb\\tc\\"d\\\\e"');
60+
});
61+
62+
describe("Should serialize strings - Unicode", () => {
63+
expect(JSON.stringify("hello 世界")).toBe('"hello 世界"');
64+
expect(JSON.stringify("café")).toBe('"café"');
65+
expect(JSON.stringify("Ḽơᶉëᶆ ȋṕšᶙṁ")).toBe('"Ḽơᶉëᶆ ȋṕšᶙṁ"');
66+
expect(JSON.stringify("😀🎉😀🎉")).toBe('"😀🎉😀🎉"');
67+
expect(JSON.stringify("مرحبا")).toBe('"مرحبا"');
68+
expect(JSON.stringify("Здравствуйте")).toBe('"Здравствуйте"');
69+
// });
70+
71+
// // Surrogate tests commented out - deserialization doesn't handle surrogates yet
72+
describe("Should serialize strings - Surrogates", () => {
73+
// Valid surrogate pairs
74+
expect(JSON.stringify("\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00")).toBe('"😀😀😀"'); // 😀 emoji
75+
expect(JSON.stringify("\uD834\uDD1E\uD834\uDD1E\uD834\uDD1E")).toBe('"𝄞𝄞𝄞"'); // Musical symbol
76+
77+
// Unpaired surrogates (should be escaped)
78+
expect(JSON.stringify("\uD800\uD800\uD800\uD800\uD800")).toBe('"\\ud800\\ud800\\ud800\\ud800\\ud800"'); // Unpaired high surrogate
79+
expect(JSON.stringify("\uDC00\uDC00\uDC00\uDC00\uDC00")).toBe('"\\udc00\\udc00\\udc00\\udc00\\udc00"'); // Unpaired low surrogate
80+
expect(JSON.stringify("\uD800abc\uD800abc\uD800")).toBe('"\\ud800abc\\ud800abc\\ud800"'); // High surrogate followed by normal chars
81+
expect(JSON.stringify("abc\uDC00abc\uDC00\uDC00")).toBe('"abc\\udc00abc\\udc00\\udc00"'); // Normal chars followed by low surrogate
82+
});
83+
84+
describe("Should serialize strings - Long strings", () => {
85+
const long1 = "a".repeat(1000);
86+
expect(JSON.stringify(long1)).toBe('"' + long1 + '"');
87+
88+
const long2 = "abc\"def\\ghi".repeat(100);
89+
const escaped2 = "abc\\\"def\\\\ghi".repeat(100);
90+
expect(JSON.stringify(long2)).toBe('"' + escaped2 + '"');
91+
92+
const long3 = "hello\nworld\t".repeat(50);
93+
const escaped3 = "hello\\nworld\\t".repeat(50);
94+
expect(JSON.stringify(long3)).toBe('"' + escaped3 + '"');
95+
});
96+
97+
describe("Should serialize strings - Edge cases with multiple escapes", () => {
98+
expect(JSON.stringify("\"\"\"\"\"\"\"\"")).toBe('"\\"\\"\\"\\"\\"\\"\\"\\""');
99+
expect(JSON.stringify("\\\\\\\\\\\\\\")).toBe('"\\\\\\\\\\\\\\\\\\\\\\\\\\\\"');
100+
expect(JSON.stringify("\n\n\n\n\n")).toBe('"\\n\\n\\n\\n\\n"');
101+
expect(JSON.stringify("\t\t\t\t\t")).toBe('"\\t\\t\\t\\t\\t"');
102+
expect(JSON.stringify("\b\f\n\r\t")).toBe('"\\b\\f\\n\\r\\t"');
103+
});
28104

29-
expect(JSON.parse<string>('"abcdYZ12345890sdfw\\"vie91kfESDFOK12i9i12dsf./?"')).toBe("abcdYZ12345890sdfw\"vie91kfESDFOK12i9i12dsf./?");
105+
describe("Should serialize strings - Strings with numbers and symbols", () => {
106+
expect(JSON.stringify("123456789")).toBe('"123456789"');
107+
expect(JSON.stringify("!@#$%^&*()")).toBe('"!@#$%^&*()"');
108+
expect(JSON.stringify("-_=+[{]};:',<.>/?")).toBe('"-_=+[{]};:\',<.>/?"');
109+
expect(JSON.stringify("test@example.com")).toBe('"test@example.com"');
110+
expect(JSON.stringify("http://example.com/path?query=value")).toBe('"http://example.com/path?query=value"');
30111
});
112+
113+
describe("Should serialize strings - All control characters", () => {
114+
for (let i = 0; i < 32; i++) {
115+
const char = String.fromCharCode(i).repeat(5);
116+
const result = JSON.stringify(char);
117+
// Should be escaped in some form
118+
expect(result.includes("\\")).toBe(true);
119+
}
120+
});
121+
122+
describe("Should serialize strings - SWAR block boundaries", () => {
123+
// Test strings that cross 8-byte boundaries
124+
expect(JSON.stringify("1234567")).toBe('"1234567"'); // 7 chars
125+
expect(JSON.stringify("12345678")).toBe('"12345678"'); // 8 chars (1 block)
126+
expect(JSON.stringify("123456789")).toBe('"123456789"'); // 9 chars
127+
expect(JSON.stringify("1234\"678")).toBe('"1234\\"678"'); // Quote at position 4
128+
expect(JSON.stringify("1234567\"")).toBe('"1234567\\""'); // Quote at position 7 (boundary)
129+
expect(JSON.stringify("12345678\"")).toBe('"12345678\\""'); // Quote at position 8 (next block)
130+
});
131+
132+
describe("Should serialize strings - Escapes at various positions", () => {
133+
expect(JSON.stringify("\"abcdefg")).toBe('"\\"abcdefg"'); // Quote at start
134+
expect(JSON.stringify("abc\"defg")).toBe('"abc\\"defg"'); // Quote in middle
135+
expect(JSON.stringify("abcdefg\"")).toBe('"abcdefg\\""'); // Quote at end
136+
expect(JSON.stringify("\\abcdefg")).toBe('"\\\\abcdefg"'); // Backslash at start
137+
expect(JSON.stringify("abc\\defg")).toBe('"abc\\\\defg"'); // Backslash in middle
138+
expect(JSON.stringify("abcdefg\\")).toBe('"abcdefg\\\\"'); // Backslash at end
139+
expect(JSON.stringify("\nabcdefg")).toBe('"\\nabcdefg"'); // Newline at start
140+
expect(JSON.stringify("abc\ndefg")).toBe('"abc\\ndefg"'); // Newline in middle
141+
expect(JSON.stringify("abcdefg\n")).toBe('"abcdefg\\n"'); // Newline at end
142+
});
143+
144+
// // ============================================================================
145+
// // DESERIALIZATION TESTS
146+
// // ============================================================================
147+
148+
// describe("Should deserialize strings - Basic", () => {
149+
// expect(JSON.parse<string>('"abcdefg"')).toBe("abcdefg");
150+
// expect(JSON.parse<string>('"\\"st\\\\\\"ring\\\\\\" w\\\\\\"\\\\\\"ith quotes\\\\\\"\\""')).toBe('"st\\"ring\\" w\\"\\"ith quotes\\""');
151+
// expect(JSON.parse<string>('"\\"string \\\\\\"with random spa\\\\nces and \\\\nnewlines\\\\n\\\\n\\\\n\\""')).toBe('"string \\"with random spa\\nces and \\nnewlines\\n\\n\\n"');
152+
// expect(JSON.parse<string>('"\\"string with colon : comma , brace [ ] bracket { } and quote \\\\\\" and other quote \\\\\\\\\\"\\""')).toBe('"string with colon : comma , brace [ ] bracket { } and quote \\" and other quote \\\\""');
153+
// expect(JSON.parse<string>('"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000b\\f\\r\\u000e\\u000f\\u000f\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001a\\u001b\\u001c\\u001d\\u001e\\u001f"')).toBe("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u000f\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f");
154+
// expect(JSON.parse<string>('"abcdYZ12345890sdfw\\"vie91kfESDFOK12i9i12dsf./?"')).toBe("abcdYZ12345890sdfw\"vie91kfESDFOK12i9i12dsf./?");
155+
// });
156+
157+
// describe("Should deserialize strings - Empty and whitespace", () => {
158+
// expect(JSON.parse<string>('""')).toBe("");
159+
// expect(JSON.parse<string>('" "')).toBe(" ");
160+
// expect(JSON.parse<string>('" "')).toBe(" ");
161+
// expect(JSON.parse<string>('"\\t"')).toBe("\t");
162+
// expect(JSON.parse<string>('"\\n"')).toBe("\n");
163+
// expect(JSON.parse<string>('"\\r"')).toBe("\r");
164+
// expect(JSON.parse<string>('"\\r\\n"')).toBe("\r\n");
165+
// });
166+
167+
// describe("Should deserialize strings - Special characters", () => {
168+
// expect(JSON.parse<string>('"\\"')).toBe('"');
169+
// expect(JSON.parse<string>('"\\\\"')).toBe("\\");
170+
// expect(JSON.parse<string>('"\\"\\\\"')).toBe('"\\');
171+
// expect(JSON.parse<string>('"\\\\\\"')).toBe('\\"');
172+
// expect(JSON.parse<string>('"\\/"')).toBe("/");
173+
// expect(JSON.parse<string>('"\\b"')).toBe("\b");
174+
// expect(JSON.parse<string>('"\\f"')).toBe("\f");
175+
// expect(JSON.parse<string>('"\\n"')).toBe("\n");
176+
// expect(JSON.parse<string>('"\\r"')).toBe("\r");
177+
// expect(JSON.parse<string>('"\\t"')).toBe("\t");
178+
// });
179+
180+
// describe("Should deserialize strings - Unicode escapes", () => {
181+
// expect(JSON.parse<string>('"\\u0000"')).toBe("\u0000");
182+
// expect(JSON.parse<string>('"\\u0001"')).toBe("\u0001");
183+
// expect(JSON.parse<string>('"\\u001f"')).toBe("\u001f");
184+
// expect(JSON.parse<string>('"\\u0041"')).toBe("A");
185+
// expect(JSON.parse<string>('"\\u0061"')).toBe("a");
186+
// expect(JSON.parse<string>('"\\u00e9"')).toBe("é");
187+
// expect(JSON.parse<string>('"\\u4e2d\\u6587"')).toBe("中文");
188+
// });
189+
190+
// describe("Should deserialize strings - Mixed escapes", () => {
191+
// expect(JSON.parse<string>('"abc\\"def"')).toBe('abc"def');
192+
// expect(JSON.parse<string>('"line1\\nline2"')).toBe("line1\nline2");
193+
// expect(JSON.parse<string>('"tab\\there"')).toBe("tab\there");
194+
// expect(JSON.parse<string>('"back\\\\slash"')).toBe("back\\slash");
195+
// expect(JSON.parse<string>('"\\"\\\\\\/\\b\\f\\n\\r\\t"')).toBe('"\\/\b\f\n\r\t');
196+
// });
197+
198+
// describe("Should deserialize strings - Unicode characters (non-escaped)", () => {
199+
// expect(JSON.parse<string>('"café"')).toBe("café");
200+
// expect(JSON.parse<string>('"hello 世界"')).toBe("hello 世界");
201+
// expect(JSON.parse<string>('"Здравствуйте"')).toBe("Здравствуйте");
202+
// expect(JSON.parse<string>('"مرحبا"')).toBe("مرحبا");
203+
// });
204+
205+
// // Surrogate tests commented out - deserialization doesn't handle surrogates yet
206+
// // describe("Should deserialize strings - Surrogates", () => {
207+
// // expect(JSON.parse<string>('"\\ud83d\\ude00"')).toBe("\uD83D\uDE00"); // 😀
208+
// // expect(JSON.parse<string>('"\\ud834\\udd1e"')).toBe("\uD834\uDD1E"); // Musical symbol
209+
// // expect(JSON.parse<string>('"\\ud800"')).toBe("\uD800"); // Unpaired high
210+
// // expect(JSON.parse<string>('"\\udc00"')).toBe("\uDC00"); // Unpaired low
211+
// // });
212+
213+
// describe("Should deserialize strings - Long strings", () => {
214+
// const long1 = '"' + "a".repeat(1000) + '"';
215+
// expect(JSON.parse<string>(long1)).toBe("a".repeat(1000));
216+
217+
// const long2 = '"' + "abc\\ndef".repeat(100) + '"';
218+
// expect(JSON.parse<string>(long2)).toBe("abc\ndef".repeat(100));
219+
// });
220+
221+
// describe("Should deserialize strings - Roundtrip", () => {
222+
// const test_strings = [
223+
// "",
224+
// "hello",
225+
// "hello world",
226+
// 'quotes "inside" string',
227+
// "backslash \\ character",
228+
// "newline\ncharacter",
229+
// "tab\tcharacter",
230+
// "all together: \"\\\n\t",
231+
// "control chars: \u0000\u0001\u001f",
232+
// "unicode: café 世界",
233+
// "long string: " + "x".repeat(500)
234+
// ];
235+
236+
// for (let i = 0; i < test_strings.length; i++) {
237+
// const original = test_strings[i];
238+
// const serialized = JSON.stringify(original);
239+
// const deserialized = JSON.parse<string>(serialized);
240+
// expect(deserialized).toBe(original);
241+
// }
242+
});

assembly/deserialize/simple/arbitrary.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ export function deserializeArbitrary(srcStart: usize, srcEnd: usize, dst: usize)
1717
} else if (firstChar == 116 || firstChar == 102) return JSON.Value.from(deserializeBoolean(srcStart, srcEnd));
1818
else if (firstChar == CHAR_N) {
1919
const value = JSON.Value.from<usize>(0);
20-
value.isNull = true;
2120
return value;
2221
}
2322
return unreachable();

0 commit comments

Comments
 (0)