Add magic forgiving JSON formatting for normal & record JSON

pimterry · pimterry · commit 0ae61fe0f725 · 2025-08-06T21:06:17.000+02:00
diff --git a/src/model/events/body-formatting.ts b/src/model/events/body-formatting.ts
@@ -9,7 +9,7 @@ import type { WorkerFormatterKey } from '../../services/ui-worker-formatters';
 import { formatBufferAsync } from '../../services/ui-worker-api';
 import { ReadOnlyParams } from '../../components/common/editable-params';
 import { ImageViewer } from '../../components/editor/image-viewer';
-import { Buffer } from 'buffer';
+import { formatJson } from '../../util/json';
 
 export interface EditorFormatter {
     language: string;
@@ -107,20 +107,7 @@ export const Formatters: { [key in ViewableContentType]: Formatter } = {
         render: (input: Buffer, headers?: Headers) => {
             if (input.byteLength < 10_000) {
                 const inputAsString = bufferToString(input);
-
-                try {
-                    // For short-ish inputs, we return synchronously - conveniently this avoids
-                    // showing the loading spinner that churns the layout in short content cases.
-                    return JSON.stringify(
-                        JSON.parse(inputAsString),
-                        null,
-                        2
-                    );
-                    // ^ Same logic as in UI-worker-formatter
-                } catch (e) {
-                    // Fallback to showing the raw un-formatted JSON:
-                    return inputAsString;
-                }
+                return formatJson(inputAsString, { formatRecords: false });
             } else {
                 return observablePromise(
                     formatBufferAsync(input, 'json', headers)
@@ -134,24 +121,8 @@ export const Formatters: { [key in ViewableContentType]: Formatter } = {
         isEditApplicable: false,
         render: (input: Buffer, headers?: Headers) => {
             if (input.byteLength < 10_000) {
-                try {
-                    let records = new Array<string>();
-                    const separator = input[input.length - 1];
-                    const separatorString = Buffer.of(separator).toString('utf8');
-
-                    splitBuffer(input, separator).forEach((recordBuffer: Buffer) => {
-                        if (recordBuffer.length > 0) {
-                            const record = recordBuffer.toString('utf-8');
-                            records.push(record + separatorString);
-                        }
-                    });
-
-                    return records.join('\n');
-                    // ^ Same logic as in UI-worker-formatter
-                } catch (e) {
-                    // Fallback to showing the raw un-formatted:
-                    return bufferToString(input);
-                }
+                const inputAsString = bufferToString(input);
+                return formatJson(inputAsString, { formatRecords: true });
             } else {
                 return observablePromise(
                     formatBufferAsync(input, 'json-records', headers)
diff --git a/src/services/ui-worker-formatters.ts b/src/services/ui-worker-formatters.ts
@@ -6,8 +6,9 @@ import {
 import * as beautifyXml from 'xml-beautifier';
 
 import { Headers } from '../types';
-import { bufferToHex, bufferToString, getReadableSize, splitBuffer } from '../util/buffer';
+import { bufferToHex, bufferToString, getReadableSize } from '../util/buffer';
 import { parseRawProtobuf, extractProtobufFromGrpc } from '../util/protobuf';
+import { formatJson } from '../util/json';
 
 const truncationMarker = (size: string) => `\n[-- Truncated to ${size} --]`;
 const FIVE_MB = 1024 * 1024 * 5;
@@ -74,29 +75,11 @@ const WorkerFormatters = {
     },
     json: (content: Buffer) => {
         const asString = content.toString('utf8');
-        try {
-            return JSON.stringify(JSON.parse(asString), null, 2);
-        } catch (e) {
-            return asString;
-        }
+        return formatJson(asString, { formatRecords: false });
     },
     'json-records': (content: Buffer) => {
-        try {
-            let records = new Array();
-            const separator = content[content.length - 1];
-            const separatorString = Buffer.of(separator).toString('utf8');
-
-            splitBuffer(content, separator).forEach((recordBuffer: Buffer) => {
-                if (recordBuffer.length > 0) {
-                    const record = recordBuffer.toString('utf-8');
-                    records.push(record + separatorString);
-                }
-            });
-
-            return records.join('\n');
-        } catch (e) {
-            return content.toString('utf8');
-        }
+        const asString = content.toString('utf8');
+        return formatJson(asString, { formatRecords: true });
     },
     javascript: (content: Buffer) => {
         return beautifyJs(content.toString('utf8'), {
diff --git a/src/util/json.ts b/src/util/json.ts
@@ -1,3 +1,8 @@
+import {
+    createScanner as createJsonScanner,
+    SyntaxKind as JsonSyntaxKind
+} from 'jsonc-parser';
+
 const JSON_START_REGEX = /^\s*[\[\{tfn"\d-]/; // Optional whitespace, then start array/object/true/false/null/string/number
 
 const JSON_TEXT_SEQ_START_REGEX = /^\u001E\s*[\[\{]/; // Record separate, optional whitespace, then array/object
@@ -56,3 +61,211 @@ export const RECORD_SEPARATOR_CHARS = [
     '\n',
     '\r'
 ];
+
+// A *very* forgiving & flexible JSON formatter. This will correctly format even things that
+// will fail validation later, such as trailing commas, unquoted keys, comments, etc.
+export function formatJson(text: string, options: { formatRecords: boolean } = { formatRecords: false }): string {
+    const scanner = createJsonScanner(text);
+
+    let result = "";
+    let indent = 0;
+    let token: JsonSyntaxKind;
+
+    const indentString = '  ';
+    let needsIndent = false;
+    let previousToken: JsonSyntaxKind | null = null;
+
+    let betweenRecords = false;
+
+    while ((token = scanner.scan()) !== JsonSyntaxKind.EOF) {
+        const tokenOffset = scanner.getTokenOffset();
+        const tokenLength = scanner.getTokenLength();
+        const tokenText = text.slice(tokenOffset, tokenOffset + tokenLength);
+
+        if (options.formatRecords && indent === 0) {
+            betweenRecords = true;
+        }
+
+        // Skip over explicit 'record separator' characters, which can cause parsing problems
+        // when parsing JSON records:
+        if (betweenRecords && tokenText[0] === '\u001E') {
+            scanner.setPosition(tokenOffset + 1);
+            continue;
+        }
+
+        // Ignore irrelevant whitespace (internally or between records) - we'll handle that ourselves
+        if (token === JsonSyntaxKind.Trivia || token === JsonSyntaxKind.LineBreakTrivia) {
+            continue;
+        }
+
+        if (betweenRecords) {
+            // We've finished one record and we have another coming that won't get a newline
+            // automatically: add an extra newline to properly separate records if required.
+            betweenRecords = false;
+            if (result && result[result.length - 1] !== '\n' && !isValueToken(token)) {
+                result += '\n';
+            }
+        }
+
+        if (needsIndent) {
+            result += indentString.repeat(indent);
+            needsIndent = false;
+        }
+
+        switch (token) {
+            case JsonSyntaxKind.OpenBraceToken:
+            case JsonSyntaxKind.OpenBracketToken:
+                result += tokenText;
+                indent++;
+
+                const afterOpener = scanAhead(scanner);
+                const isClosing = afterOpener === JsonSyntaxKind.CloseBraceToken ||
+                                 afterOpener === JsonSyntaxKind.CloseBracketToken;
+                if (
+                    !isClosing &&
+                    afterOpener !== JsonSyntaxKind.EOF &&
+                    afterOpener !== JsonSyntaxKind.LineCommentTrivia
+                ) {
+                    result += '\n';
+                    needsIndent = true;
+                }
+                break;
+
+            case JsonSyntaxKind.CloseBraceToken:
+            case JsonSyntaxKind.CloseBracketToken:
+                const wasEmpty = previousToken === JsonSyntaxKind.OpenBraceToken ||
+                                previousToken === JsonSyntaxKind.OpenBracketToken;
+
+                let indentUnderflow = indent === 0;
+                indent = Math.max(0, indent - 1);
+
+                if (!wasEmpty) {
+                    if (!result.endsWith('\n')) {
+                        result += '\n';
+                    }
+                    result += indentString.repeat(indent);
+                }
+
+                result += tokenText;
+                if (indentUnderflow) result += '\n';
+
+                break;
+
+            case JsonSyntaxKind.CommaToken:
+                result += tokenText;
+
+                const afterComma = scanAhead(scanner);
+                if (
+                    afterComma !== JsonSyntaxKind.LineCommentTrivia &&
+                    afterComma !== JsonSyntaxKind.BlockCommentTrivia &&
+                    afterComma !== JsonSyntaxKind.CloseBraceToken &&
+                    afterComma !== JsonSyntaxKind.CloseBracketToken &&
+                    afterComma !== JsonSyntaxKind.EOF &&
+                    afterComma !== JsonSyntaxKind.CommaToken
+                ) {
+                    result += '\n';
+                    needsIndent = true;
+                }
+                break;
+
+            case JsonSyntaxKind.ColonToken:
+                result += tokenText;
+                result += ' ';
+                break;
+
+            case JsonSyntaxKind.LineCommentTrivia:
+                const needsNewlineBefore = (
+                    previousToken === JsonSyntaxKind.OpenBraceToken ||
+                    previousToken === JsonSyntaxKind.OpenBracketToken
+                ) && !result.endsWith('\n');
+
+                if (needsNewlineBefore) {
+                    result += '\n';
+                    needsIndent = true;
+                }
+
+                if (needsIndent) {
+                    result += indentString.repeat(indent);
+                    needsIndent = false;
+                    result += tokenText;
+                } else {
+                    const trimmedResult = result.trimEnd();
+                    if (result.length > trimmedResult.length) {
+                        result = trimmedResult + tokenText;
+                    } else {
+                        result += ' ' + tokenText;
+                    }
+                }
+
+                const afterComment = scanAhead(scanner);
+                if (
+                    afterComment !== JsonSyntaxKind.CloseBraceToken &&
+                    afterComment !== JsonSyntaxKind.CloseBracketToken &&
+                    afterComment !== JsonSyntaxKind.EOF
+                ) {
+                    result += '\n';
+                    needsIndent = true;
+                }
+                break;
+
+            case JsonSyntaxKind.BlockCommentTrivia:
+                const prevChar = result[result.length - 1];
+                if (prevChar === '\n' || (prevChar === ' ' && result[result.length - 2] === '\n')) {
+                    result += tokenText;
+                } else {
+                    result += ' ' + tokenText;
+                }
+
+                const afterBlock = scanAhead(scanner);
+                if (
+                    afterBlock !== JsonSyntaxKind.CommaToken &&
+                    afterBlock !== JsonSyntaxKind.CloseBraceToken &&
+                    afterBlock !== JsonSyntaxKind.CloseBracketToken &&
+                    afterBlock !== JsonSyntaxKind.EOF
+                ) {
+                    result += '\n';
+                    needsIndent = true;
+                }
+                break;
+
+            default:
+                const followsValue = isValueToken(previousToken);
+                if (followsValue && isValueToken(token) && !result.endsWith('\n')) {
+                    // Missing comma detected between sequential values,
+                    // so add a newline for readability
+                    result += '\n';
+                    result += indentString.repeat(indent);
+                }
+
+                result += tokenText;
+                break;
+        }
+
+        previousToken = token;
+    }
+
+    return result;
+}
+
+function isValueToken(token: JsonSyntaxKind | null): boolean {
+    return token === JsonSyntaxKind.StringLiteral ||
+           token === JsonSyntaxKind.NumericLiteral ||
+           token === JsonSyntaxKind.TrueKeyword ||
+           token === JsonSyntaxKind.FalseKeyword ||
+           token === JsonSyntaxKind.NullKeyword ||
+           token === JsonSyntaxKind.CloseBraceToken ||
+           token === JsonSyntaxKind.CloseBracketToken;
+}
+
+function scanAhead(scanner: any): JsonSyntaxKind {
+    const savedPosition = scanner.getPosition();
+
+    let nextToken = scanner.scan();
+    while (nextToken === JsonSyntaxKind.Trivia ||
+           nextToken === JsonSyntaxKind.LineBreakTrivia) {
+        nextToken = scanner.scan();
+    }
+
+    scanner.setPosition(savedPosition);
+    return nextToken;
+}
diff --git a/test/unit/util/json.spec.ts b/test/unit/util/json.spec.ts