From a68651731b2361ff5c78f494dd6c247d564cba09 Mon Sep 17 00:00:00 2001 From: streamich Date: Sun, 9 Mar 2025 17:45:17 +0100 Subject: [PATCH 1/6] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20implement=20partial?= =?UTF-8?q?=20JSON=20array=20parsing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/json/JsonDecoder.ts | 8 +-- src/json/JsonDecoderPartial.ts | 49 ++++++++++++++++++ src/json/__tests__/JsonDecoderPartial.spec.ts | 51 +++++++++++++++++++ 3 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 src/json/JsonDecoderPartial.ts create mode 100644 src/json/__tests__/JsonDecoderPartial.spec.ts diff --git a/src/json/JsonDecoder.ts b/src/json/JsonDecoder.ts index 1b990b4a..87dc26d5 100644 --- a/src/json/JsonDecoder.ts +++ b/src/json/JsonDecoder.ts @@ -642,17 +642,17 @@ export class JsonDecoder implements BinaryJsonDecoder { public readArr(): unknown[] { const reader = this.reader; - if (reader.u8() !== 0x5b) throw new Error('Invalid JSON'); + if (reader.u8() !== 0x5b /* [ */) throw new Error('Invalid JSON'); const arr: unknown[] = []; const uint8 = reader.uint8; while (true) { this.skipWhitespace(); const char = uint8[reader.x]; - if (char === 0x5d) return reader.x++, arr; // ] - if (char === 0x2c) { + if (char === 0x5d /* ] */) return reader.x++, arr; + if (char === 0x2c /* , */) { reader.x++; continue; - } // , + } arr.push(this.readAny()); } } diff --git a/src/json/JsonDecoderPartial.ts b/src/json/JsonDecoderPartial.ts new file mode 100644 index 00000000..75693843 --- /dev/null +++ b/src/json/JsonDecoderPartial.ts @@ -0,0 +1,49 @@ +import {JsonDecoder} from './JsonDecoder'; + +export class JsonDecoderPartial extends JsonDecoder { + public readArr(): unknown[] { + const reader = this.reader; + reader.u8(); /* [ */ + const arr: unknown[] = []; + const uint8 = reader.uint8; + while (true) { + this.skipWhitespace(); + const char = uint8[reader.x]; + if (char === 0x5d /* ] */) return reader.x++, arr; + if (char === 0x2c /* , */) { + reader.x++; + continue; + } + try { + arr.push(this.readAny()); + } catch (error) { + if (error instanceof Error && error.message === 'Invalid JSON') return arr; + throw error; + } + } + } + + // public readObj(): PackValue | Record | unknown { + // const reader = this.reader; + // if (reader.u8() !== 0x7b) throw new Error('Invalid JSON'); + // const obj: Record = {}; + // const uint8 = reader.uint8; + // while (true) { + // this.skipWhitespace(); + // let char = uint8[reader.x]; + // if (char === 0x7d) return reader.x++, obj; // } + // if (char === 0x2c) { + // reader.x++; + // continue; + // } // , + // char = uint8[reader.x++]; + // if (char !== 0x22) throw new Error('Invalid JSON'); + // const key = readShortUtf8StrAndUnescape(reader); + // if (key === '__proto__') throw new Error('Invalid JSON'); + // this.skipWhitespace(); + // if (reader.u8() !== 0x3a) throw new Error('Invalid JSON'); + // this.skipWhitespace(); + // obj[key] = this.readAny(); + // } + // } +} diff --git a/src/json/__tests__/JsonDecoderPartial.spec.ts b/src/json/__tests__/JsonDecoderPartial.spec.ts new file mode 100644 index 00000000..304d5148 --- /dev/null +++ b/src/json/__tests__/JsonDecoderPartial.spec.ts @@ -0,0 +1,51 @@ +import {JsonDecoderPartial} from '../JsonDecoderPartial'; + +const decoder = new JsonDecoderPartial(); +const parse = (text: string) => { + const data = Buffer.from(text, 'utf-8'); + decoder.reader.reset(data); + const value = decoder.readAny(); + return value; +}; + +describe('array', () => { + test('can parse valid array', () => { + const value = parse('[1, 2, 3]'); + expect(value).toEqual([1, 2, 3]); + }); + + test('can parse array with missing closing brace', () => { + const value = parse('[1, 2, 3 '); + expect(value).toEqual([1, 2, 3]); + }); + + test('can parse array with missing closing brace - 2', () => { + const value = parse('[1, 2, 3'); + expect(value).toEqual([1, 2, 3]); + }); + + test('can parse array with trailing comma', () => { + const value = parse('[1, 2, '); + expect(value).toEqual([1, 2]); + }); + + test('can parse array with trailing comma - 2', () => { + const value = parse('[1, 2,'); + expect(value).toEqual([1, 2]); + }); + + test('can parse array with two trailing commas', () => { + const value = parse('[true, "asdf",,'); + expect(value).toEqual([true, 'asdf']); + }); + + test('can parse array with double commas', () => { + const value = parse('[true, "asdf",, 4]'); + expect(value).toEqual([true, 'asdf', 4]); + }); + + test('can parse array with triple commas', () => { + const value = parse('[true, "asdf",, , 4]'); + expect(value).toEqual([true, 'asdf', 4]); + }); +}); From 5474418da5c5e63dfe776b374640f51375dfda32 Mon Sep 17 00:00:00 2001 From: streamich Date: Sun, 9 Mar 2025 18:07:54 +0100 Subject: [PATCH 2/6] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20implement=20partial?= =?UTF-8?q?=20object=20parsing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/json/JsonDecoder.ts | 62 +++++++-------- src/json/JsonDecoderPartial.ts | 73 +++++++++++------ src/json/__tests__/JsonDecoderPartial.spec.ts | 78 +++++++++++++++++++ 3 files changed, 153 insertions(+), 60 deletions(-) diff --git a/src/json/JsonDecoder.ts b/src/json/JsonDecoder.ts index 87dc26d5..ab7dd7bd 100644 --- a/src/json/JsonDecoder.ts +++ b/src/json/JsonDecoder.ts @@ -107,7 +107,7 @@ const isUndefined = (u8: Uint8Array, x: number) => const fromCharCode = String.fromCharCode; -const readShortUtf8StrAndUnescape = (reader: Reader): string => { +export const readKey = (reader: Reader): string => { const buf = reader.uint8; const len = buf.length; const points: number[] = []; @@ -202,10 +202,8 @@ export class JsonDecoder implements BinaryJsonDecoder { const uint8 = reader.uint8; const char = uint8[x]; switch (char) { - case 34: { - // " - if (uint8[x + 1] === 0x64) { - // d + case 34 /* " */ : { + if (uint8[x + 1] === 0x64 /* d */) { const bin = this.tryReadBin(); if (bin) return bin; if (isUndefined(uint8, x + 2)) { @@ -215,18 +213,13 @@ export class JsonDecoder implements BinaryJsonDecoder { } return this.readStr(); } - case 91: // [ - return this.readArr(); - case 102: // f - return this.readFalse(); - case 110: // n - return this.readNull(); - case 116: // t - return this.readTrue(); - case 123: // { - return this.readObj(); + case 91 /* [ */ : return this.readArr(); + case 102 /* f */ : return this.readFalse(); + case 110 /* n */ : return this.readNull(); + case 116 /* t */ : return this.readTrue(); + case 123 /* { */ : return this.readObj(); default: - if ((char >= 48 && char <= 57) || char === 45) return this.readNum(); + if ((char >= 48 /* 0 */ && char <= 57 /* 9 */) || char === 45 /* - */) return this.readNum(); throw new Error('Invalid JSON'); } } @@ -239,10 +232,10 @@ export class JsonDecoder implements BinaryJsonDecoder { while (true) { char = uint8[x]; switch (char) { - case 32: // space - case 9: // tab - case 10: // line feed - case 13: // carriage return + case 32 /* */ : + case 9 /* */ : + case 10 /* */ : + case 13 /* */ : x++; continue; default: @@ -253,30 +246,27 @@ export class JsonDecoder implements BinaryJsonDecoder { } public readNull(): null { - if (this.reader.u32() !== 0x6e756c6c) throw new Error('Invalid JSON'); + if (this.reader.u32() !== 0x6e756c6c /* null */) throw new Error('Invalid JSON'); return null; } public readTrue(): true { - if (this.reader.u32() !== 0x74727565) throw new Error('Invalid JSON'); + if (this.reader.u32() !== 0x74727565 /* true */) throw new Error('Invalid JSON'); return true; } public readFalse(): false { const reader = this.reader; - if (reader.u8() !== 0x66 || reader.u32() !== 0x616c7365) throw new Error('Invalid JSON'); + if (reader.u8() !== 0x66 /* f */ || reader.u32() !== 0x616c7365 /* alse */) throw new Error('Invalid JSON'); return false; } public readBool(): unknown { const reader = this.reader; switch (reader.uint8[reader.x]) { - case 102: // f - return this.readFalse(); - case 116: // t - return this.readTrue(); - default: - throw new Error('Invalid JSON'); + case 102 /* f */ : return this.readFalse(); + case 116 /* t */ : return this.readTrue(); + default: throw new Error('Invalid JSON'); } } @@ -659,23 +649,23 @@ export class JsonDecoder implements BinaryJsonDecoder { public readObj(): PackValue | Record | unknown { const reader = this.reader; - if (reader.u8() !== 0x7b) throw new Error('Invalid JSON'); + if (reader.u8() !== 0x7b /* { */) throw new Error('Invalid JSON'); const obj: Record = {}; const uint8 = reader.uint8; while (true) { this.skipWhitespace(); let char = uint8[reader.x]; - if (char === 0x7d) return reader.x++, obj; // } - if (char === 0x2c) { + if (char === 0x7d /* } */) return reader.x++, obj; + if (char === 0x2c /* , */) { reader.x++; continue; - } // , + } char = uint8[reader.x++]; - if (char !== 0x22) throw new Error('Invalid JSON'); - const key = readShortUtf8StrAndUnescape(reader); + if (char !== 0x22 /* " */) throw new Error('Invalid JSON'); + const key = readKey(reader); if (key === '__proto__') throw new Error('Invalid JSON'); this.skipWhitespace(); - if (reader.u8() !== 0x3a) throw new Error('Invalid JSON'); + if (reader.u8() !== 0x3a /* : */) throw new Error('Invalid JSON'); this.skipWhitespace(); obj[key] = this.readAny(); } diff --git a/src/json/JsonDecoderPartial.ts b/src/json/JsonDecoderPartial.ts index 75693843..e74282dd 100644 --- a/src/json/JsonDecoderPartial.ts +++ b/src/json/JsonDecoderPartial.ts @@ -1,5 +1,25 @@ -import {JsonDecoder} from './JsonDecoder'; +import {JsonDecoder, readKey} from './JsonDecoder'; +import type {PackValue} from '../types'; +/** + * This class parses JSON which is correct but not necessarily complete. + * It can be used to parse JSON that is being streamed in chunks. If the end + * of the JSON is missing, this parser will return the initial, correct, parsed + * part of the JSON, until the point where the JSON is no longer valid. + * + * Examples: + * + * ```js + * // Missing closing brace + * decoder.readAny('[1, 2, 3'); // [1, 2, 3] + * + * // Trailing comma and missing closing brace + * decoder.readAny('[1, 2, '); // [1, 2] + * + * // Corrupt second element and missing closing brace + * decoder.readAny('{"foo": 1, "bar":'); // {"foo": 1} + * ``` + */ export class JsonDecoderPartial extends JsonDecoder { public readArr(): unknown[] { const reader = this.reader; @@ -23,27 +43,32 @@ export class JsonDecoderPartial extends JsonDecoder { } } - // public readObj(): PackValue | Record | unknown { - // const reader = this.reader; - // if (reader.u8() !== 0x7b) throw new Error('Invalid JSON'); - // const obj: Record = {}; - // const uint8 = reader.uint8; - // while (true) { - // this.skipWhitespace(); - // let char = uint8[reader.x]; - // if (char === 0x7d) return reader.x++, obj; // } - // if (char === 0x2c) { - // reader.x++; - // continue; - // } // , - // char = uint8[reader.x++]; - // if (char !== 0x22) throw new Error('Invalid JSON'); - // const key = readShortUtf8StrAndUnescape(reader); - // if (key === '__proto__') throw new Error('Invalid JSON'); - // this.skipWhitespace(); - // if (reader.u8() !== 0x3a) throw new Error('Invalid JSON'); - // this.skipWhitespace(); - // obj[key] = this.readAny(); - // } - // } + public readObj(): PackValue | Record | unknown { + const reader = this.reader; + if (reader.u8() !== 0x7b /* { */) throw new Error('Invalid JSON'); + const obj: Record = {}; + const uint8 = reader.uint8; + while (true) { + this.skipWhitespace(); + let char = uint8[reader.x]; + if (char === 0x7d /* } */) return reader.x++, obj; + if (char === 0x2c /* , */) { + reader.x++; + continue; + } + try { + char = uint8[reader.x++]; + if (char !== 0x22 /* " */) throw new Error('Invalid JSON'); + const key = readKey(reader); + if (key === '__proto__') throw new Error('Invalid JSON'); + this.skipWhitespace(); + if (reader.u8() !== 0x3a /* : */) throw new Error('Invalid JSON'); + this.skipWhitespace(); + obj[key] = this.readAny(); + } catch (error) { + if (error instanceof Error && error.message === 'Invalid JSON') return obj; + throw error; + } + } + } } diff --git a/src/json/__tests__/JsonDecoderPartial.spec.ts b/src/json/__tests__/JsonDecoderPartial.spec.ts index 304d5148..579f5af7 100644 --- a/src/json/__tests__/JsonDecoderPartial.spec.ts +++ b/src/json/__tests__/JsonDecoderPartial.spec.ts @@ -48,4 +48,82 @@ describe('array', () => { const value = parse('[true, "asdf",, , 4]'); expect(value).toEqual([true, 'asdf', 4]); }); + + test('can parse nested arrays', () => { + const value = parse('[[true, false, null]]'); + expect(value).toEqual([[true, false, null]]); + }); + + test('can parse nested arrays with missing brace', () => { + const value = parse('[[true, false, null]'); + expect(value).toEqual([[true, false, null]]); + }); + + test('can parse nested arrays with two missing braces', () => { + const value = parse('[[true, false, null'); + expect(value).toEqual([[true, false, null]]); + }); + + test('can parse nested arrays with two missing element', () => { + const value = parse('[[true, false,'); + expect(value).toEqual([[true, false]]); + }); +}); + +describe('object', () => { + test('can parse valid object', () => { + const value = parse('{"foo": 1, "bar": 2}'); + expect(value).toEqual({foo: 1, bar: 2}); + }); + + test('can parse object with missing brace (trailing space)', () => { + const value = parse('{"foo": 1, "bar": 2 '); + expect(value).toEqual({foo: 1, bar: 2}); + }); + + test('can parse object with missing brace', () => { + const value = parse('{"foo": 1, "bar": 2'); + expect(value).toEqual({foo: 1, bar: 2}); + }); + + test('can parse object with missing field value', () => { + const value1 = parse('{"foo": 1, "bar": '); + const value2 = parse('{"foo": 1, "bar":'); + const value3 = parse('{"foo": 1, "bar"'); + const value4 = parse('{"foo": 1, "bar'); + const value5 = parse('{"foo": 1, "b'); + const value6 = parse('{"foo": 1, "'); + const value7 = parse('{"foo": 1, '); + const value8 = parse('{"foo": 1,'); + const value9 = parse('{"foo": 1'); + expect(value1).toEqual({foo: 1}); + expect(value2).toEqual({foo: 1}); + expect(value3).toEqual({foo: 1}); + expect(value4).toEqual({foo: 1}); + expect(value5).toEqual({foo: 1}); + expect(value6).toEqual({foo: 1}); + expect(value7).toEqual({foo: 1}); + expect(value8).toEqual({foo: 1}); + expect(value9).toEqual({foo: 1}); + }); + + test('can parse nested object', () => { + const value1 = parse('{"a": {"foo": 1, "bar": 2}}'); + const value2 = parse('{"a": {"foo": 1, "bar": 2} }'); + const value3 = parse('{"a": {"foo": 1, "bar": 2} '); + const value4 = parse('{"a": {"foo": 1, "bar": 2}'); + const value5 = parse('{"a": {"foo": 1, "bar": 2 '); + const value6 = parse('{"a": {"foo": 1, "bar": 2'); + expect(value1).toEqual({a: {foo: 1, bar: 2}}); + expect(value2).toEqual({a: {foo: 1, bar: 2}}); + expect(value3).toEqual({a: {foo: 1, bar: 2}}); + expect(value4).toEqual({a: {foo: 1, bar: 2}}); + expect(value5).toEqual({a: {foo: 1, bar: 2}}); + expect(value6).toEqual({a: {foo: 1, bar: 2}}); + }); +}); + +test('simple nested object', () => { + const value = parse('{ "name": { "first": "ind", "last": "go'); + expect(value).toEqual({name: {first: 'ind'}}); }); From f726682697a8157871e2029ec4dcec3dd368c433 Mon Sep 17 00:00:00 2001 From: streamich Date: Sun, 9 Mar 2025 19:15:20 +0100 Subject: [PATCH 3/6] =?UTF-8?q?fix:=20=F0=9F=90=9B=20prohibit=20missing=20?= =?UTF-8?q?comma=20in=20main=20JSON=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/json/JsonDecoder.ts | 18 ++++++++++-------- src/json/__tests__/JsonDecoder.spec.ts | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/json/JsonDecoder.ts b/src/json/JsonDecoder.ts index ab7dd7bd..e9e30c8e 100644 --- a/src/json/JsonDecoder.ts +++ b/src/json/JsonDecoder.ts @@ -635,15 +635,16 @@ export class JsonDecoder implements BinaryJsonDecoder { if (reader.u8() !== 0x5b /* [ */) throw new Error('Invalid JSON'); const arr: unknown[] = []; const uint8 = reader.uint8; + let first = true; while (true) { this.skipWhitespace(); const char = uint8[reader.x]; if (char === 0x5d /* ] */) return reader.x++, arr; - if (char === 0x2c /* , */) { - reader.x++; - continue; - } + if (char === 0x2c /* , */) reader.x++; + else if (!first) throw new Error('Invalid JSON'); + this.skipWhitespace(); arr.push(this.readAny()); + first = false; } } @@ -652,14 +653,14 @@ export class JsonDecoder implements BinaryJsonDecoder { if (reader.u8() !== 0x7b /* { */) throw new Error('Invalid JSON'); const obj: Record = {}; const uint8 = reader.uint8; + let first = true; while (true) { this.skipWhitespace(); let char = uint8[reader.x]; if (char === 0x7d /* } */) return reader.x++, obj; - if (char === 0x2c /* , */) { - reader.x++; - continue; - } + if (char === 0x2c /* , */) reader.x++; + else if (!first) throw new Error('Invalid JSON'); + this.skipWhitespace(); char = uint8[reader.x++]; if (char !== 0x22 /* " */) throw new Error('Invalid JSON'); const key = readKey(reader); @@ -668,6 +669,7 @@ export class JsonDecoder implements BinaryJsonDecoder { if (reader.u8() !== 0x3a /* : */) throw new Error('Invalid JSON'); this.skipWhitespace(); obj[key] = this.readAny(); + first = false; } } } diff --git a/src/json/__tests__/JsonDecoder.spec.ts b/src/json/__tests__/JsonDecoder.spec.ts index 29307eb2..d53a429d 100644 --- a/src/json/__tests__/JsonDecoder.spec.ts +++ b/src/json/__tests__/JsonDecoder.spec.ts @@ -322,6 +322,19 @@ describe('array', () => { expect(value).toEqual([1, 2.2, -3.3]); }); + test('simple array', () => { + const data = Buffer.from('[1, 2, 3]', 'utf-8'); + decoder.reader.reset(data); + const value = decoder.readAny(); + expect(value).toEqual([1, 2, 3]); + }); + + test('missing comma', () => { + const data = Buffer.from('[1, 2 3]', 'utf-8'); + decoder.reader.reset(data); + expect(() => decoder.readAny()).toThrow(new Error('Invalid JSON')); + }); + test('nested arrays', () => { const data = Buffer.from(' \n \r \t [[],\n[ 4,\t5] , [null]] \n \r \t ', 'utf-8'); decoder.reader.reset(data); @@ -366,6 +379,19 @@ describe('object', () => { expect(value).toEqual({foo: 'bar'}); }); + test('simple object', () => { + const data = Buffer.from('{"foo": 1, "bar": 2}', 'utf-8'); + decoder.reader.reset(data); + const value = decoder.readAny(); + expect(value).toEqual({foo: 1, bar: 2}); + }); + + test('missing comma', () => { + const data = Buffer.from('{"foo": 1 "bar": 2}', 'utf-8'); + decoder.reader.reset(data); + expect(() => decoder.readAny()).toThrow(new Error('Invalid JSON')); + }); + test('nested object', () => { const data = Buffer.from('{"":{}}', 'utf-8'); decoder.reader.reset(data); From 12c9ad970cbd74b41e53100c5f74c0fca4fa9fc9 Mon Sep 17 00:00:00 2001 From: streamich Date: Sun, 9 Mar 2025 19:31:53 +0100 Subject: [PATCH 4/6] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20finalize=20partial?= =?UTF-8?q?=20JSON=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/json/JsonDecoderPartial.ts | 53 ++++++++++++++----- src/json/__tests__/JsonDecoderPartial.spec.ts | 20 ++++++- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/src/json/JsonDecoderPartial.ts b/src/json/JsonDecoderPartial.ts index e74282dd..c605ed34 100644 --- a/src/json/JsonDecoderPartial.ts +++ b/src/json/JsonDecoderPartial.ts @@ -1,11 +1,20 @@ import {JsonDecoder, readKey} from './JsonDecoder'; import type {PackValue} from '../types'; +export class DecodeFinishError extends Error { + constructor(public readonly value: unknown) { + super('DECODE_FINISH'); + } +} + /** - * This class parses JSON which is correct but not necessarily complete. - * It can be used to parse JSON that is being streamed in chunks. If the end - * of the JSON is missing, this parser will return the initial, correct, parsed - * part of the JSON, until the point where the JSON is no longer valid. + * This class parses JSON which is mostly correct but not necessarily complete + * or with missing parts. It can be used to parse JSON that is being streamed + * in chunks or JSON output of an LLM model. + * + * If the end of a nested JSON value (array, object) is missing, this parser + * will return the initial correct part for that value, which it was able to + * parse, until the point where the JSON is no longer valid. * * Examples: * @@ -21,25 +30,36 @@ import type {PackValue} from '../types'; * ``` */ export class JsonDecoderPartial extends JsonDecoder { + public readAny(): unknown { + try { + return super.readAny(); + } catch (error) { + if (error instanceof DecodeFinishError) return error.value; + throw error; + } + } + public readArr(): unknown[] { const reader = this.reader; - reader.u8(); /* [ */ + if (reader.u8() !== 0x5b /* [ */) throw new Error('Invalid JSON'); const arr: unknown[] = []; const uint8 = reader.uint8; + let first = true; while (true) { this.skipWhitespace(); const char = uint8[reader.x]; if (char === 0x5d /* ] */) return reader.x++, arr; - if (char === 0x2c /* , */) { - reader.x++; - continue; - } + if (char === 0x2c /* , */) reader.x++; + else if (!first) return arr; + this.skipWhitespace(); try { arr.push(this.readAny()); } catch (error) { - if (error instanceof Error && error.message === 'Invalid JSON') return arr; + if (error instanceof DecodeFinishError) return arr.push(error.value), arr; + if (error instanceof Error && error.message === 'Invalid JSON') throw new DecodeFinishError(arr); throw error; } + first = false; } } @@ -64,9 +84,18 @@ export class JsonDecoderPartial extends JsonDecoder { this.skipWhitespace(); if (reader.u8() !== 0x3a /* : */) throw new Error('Invalid JSON'); this.skipWhitespace(); - obj[key] = this.readAny(); + try { + obj[key] = this.readAny(); + } catch (error) { + if (error instanceof DecodeFinishError) { + obj[key] = error.value; + return obj; + } + throw error; + } } catch (error) { - if (error instanceof Error && error.message === 'Invalid JSON') return obj; + if (error instanceof DecodeFinishError) return obj; + if (error instanceof Error && error.message === 'Invalid JSON') throw new DecodeFinishError(obj); throw error; } } diff --git a/src/json/__tests__/JsonDecoderPartial.spec.ts b/src/json/__tests__/JsonDecoderPartial.spec.ts index 579f5af7..82d74845 100644 --- a/src/json/__tests__/JsonDecoderPartial.spec.ts +++ b/src/json/__tests__/JsonDecoderPartial.spec.ts @@ -39,12 +39,12 @@ describe('array', () => { expect(value).toEqual([true, 'asdf']); }); - test('can parse array with double commas', () => { + test.skip('can parse array with double commas', () => { const value = parse('[true, "asdf",, 4]'); expect(value).toEqual([true, 'asdf', 4]); }); - test('can parse array with triple commas', () => { + test.skip('can parse array with triple commas', () => { const value = parse('[true, "asdf",, , 4]'); expect(value).toEqual([true, 'asdf', 4]); }); @@ -127,3 +127,19 @@ test('simple nested object', () => { const value = parse('{ "name": { "first": "ind", "last": "go'); expect(value).toEqual({name: {first: 'ind'}}); }); + +test('example output from LLM', () => { + const value = parse(` +{ + "name": "Alice", + "age": 25, + "hobbies": ["eat", "drink" + "is_student": false +Some extra text after the JSON with missing closing brace.`); + expect(value).toEqual({ + name: 'Alice', + age: 25, + hobbies: ['eat', 'drink'], + is_student: false, + }); +}); From 0685c51b6c40c92915309d43ed9f462ad9b021c1 Mon Sep 17 00:00:00 2001 From: streamich Date: Sun, 9 Mar 2025 19:32:33 +0100 Subject: [PATCH 5/6] =?UTF-8?q?style:=20=F0=9F=92=84=20run=20Prettier?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/json/JsonDecoder.ts | 36 +++++++++++++++++++++------------- src/json/JsonDecoderPartial.ts | 10 +++++----- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/json/JsonDecoder.ts b/src/json/JsonDecoder.ts index e9e30c8e..a75f77bc 100644 --- a/src/json/JsonDecoder.ts +++ b/src/json/JsonDecoder.ts @@ -202,7 +202,7 @@ export class JsonDecoder implements BinaryJsonDecoder { const uint8 = reader.uint8; const char = uint8[x]; switch (char) { - case 34 /* " */ : { + case 34 /* " */: { if (uint8[x + 1] === 0x64 /* d */) { const bin = this.tryReadBin(); if (bin) return bin; @@ -213,13 +213,18 @@ export class JsonDecoder implements BinaryJsonDecoder { } return this.readStr(); } - case 91 /* [ */ : return this.readArr(); - case 102 /* f */ : return this.readFalse(); - case 110 /* n */ : return this.readNull(); - case 116 /* t */ : return this.readTrue(); - case 123 /* { */ : return this.readObj(); + case 91 /* [ */: + return this.readArr(); + case 102 /* f */: + return this.readFalse(); + case 110 /* n */: + return this.readNull(); + case 116 /* t */: + return this.readTrue(); + case 123 /* { */: + return this.readObj(); default: - if ((char >= 48 /* 0 */ && char <= 57 /* 9 */) || char === 45 /* - */) return this.readNum(); + if ((char >= 48 /* 0 */ && char <= 57) /* 9 */ || char === 45 /* - */) return this.readNum(); throw new Error('Invalid JSON'); } } @@ -232,10 +237,10 @@ export class JsonDecoder implements BinaryJsonDecoder { while (true) { char = uint8[x]; switch (char) { - case 32 /* */ : - case 9 /* */ : - case 10 /* */ : - case 13 /* */ : + case 32 /* */: + case 9 /* */: + case 10 /* */: + case 13 /* */: x++; continue; default: @@ -264,9 +269,12 @@ export class JsonDecoder implements BinaryJsonDecoder { public readBool(): unknown { const reader = this.reader; switch (reader.uint8[reader.x]) { - case 102 /* f */ : return this.readFalse(); - case 116 /* t */ : return this.readTrue(); - default: throw new Error('Invalid JSON'); + case 102 /* f */: + return this.readFalse(); + case 116 /* t */: + return this.readTrue(); + default: + throw new Error('Invalid JSON'); } } diff --git a/src/json/JsonDecoderPartial.ts b/src/json/JsonDecoderPartial.ts index c605ed34..f736bf68 100644 --- a/src/json/JsonDecoderPartial.ts +++ b/src/json/JsonDecoderPartial.ts @@ -11,20 +11,20 @@ export class DecodeFinishError extends Error { * This class parses JSON which is mostly correct but not necessarily complete * or with missing parts. It can be used to parse JSON that is being streamed * in chunks or JSON output of an LLM model. - * + * * If the end of a nested JSON value (array, object) is missing, this parser * will return the initial correct part for that value, which it was able to * parse, until the point where the JSON is no longer valid. - * + * * Examples: - * + * * ```js * // Missing closing brace * decoder.readAny('[1, 2, 3'); // [1, 2, 3] - * + * * // Trailing comma and missing closing brace * decoder.readAny('[1, 2, '); // [1, 2] - * + * * // Corrupt second element and missing closing brace * decoder.readAny('{"foo": 1, "bar":'); // {"foo": 1} * ``` From 44ed728973f025d2da0afd76ed774e016697cd74 Mon Sep 17 00:00:00 2001 From: streamich Date: Sun, 9 Mar 2025 19:35:27 +0100 Subject: [PATCH 6/6] =?UTF-8?q?test:=20=F0=9F=92=8D=20add=20automated=20te?= =?UTF-8?q?sts=20for=20JsonDecoderPartial?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../JsonDecoderPartial.automated.spec.ts | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 src/json/__tests__/JsonDecoderPartial.automated.spec.ts diff --git a/src/json/__tests__/JsonDecoderPartial.automated.spec.ts b/src/json/__tests__/JsonDecoderPartial.automated.spec.ts new file mode 100644 index 00000000..17479561 --- /dev/null +++ b/src/json/__tests__/JsonDecoderPartial.automated.spec.ts @@ -0,0 +1,39 @@ +import {Writer} from '@jsonjoy.com/util/lib/buffers/Writer'; +import {JsonValue} from '../../types'; +import {JsonEncoder} from '../JsonEncoder'; +import {JsonEncoderStable} from '../JsonEncoderStable'; +import {JsonDecoderPartial} from '../JsonDecoderPartial'; +import {documents} from '../../__tests__/json-documents'; +import {binaryDocuments} from '../../__tests__/binary-documents'; + +const writer = new Writer(8); +const encoder = new JsonEncoder(writer); +const encoderStable = new JsonEncoderStable(writer); +const decoder = new JsonDecoderPartial(); + +const assertEncoder = (value: JsonValue) => { + const encoded = encoder.encode(value); + const encoded2 = encoderStable.encode(value); + // const json = Buffer.from(encoded).toString('utf-8'); + // console.log('json', json); + const decoded = decoder.decode(encoded); + const decoded2 = decoder.decode(encoded2); + expect(decoded).toEqual(value); + expect(decoded2).toEqual(value); +}; + +describe('Sample JSON documents', () => { + for (const t of documents) { + (t.only ? test.only : test)(t.name, () => { + assertEncoder(t.json as any); + }); + } +}); + +describe('Sample binary documents', () => { + for (const t of binaryDocuments) { + (t.only ? test.only : test)(t.name, () => { + assertEncoder(t.json as any); + }); + } +});