Skip to content

Commit 0ae61fe

Browse files
committed
Add magic forgiving JSON formatting for normal & record JSON
1 parent 1f84472 commit 0ae61fe

File tree

4 files changed

+561
-55
lines changed

4 files changed

+561
-55
lines changed

src/model/events/body-formatting.ts

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import type { WorkerFormatterKey } from '../../services/ui-worker-formatters';
99
import { formatBufferAsync } from '../../services/ui-worker-api';
1010
import { ReadOnlyParams } from '../../components/common/editable-params';
1111
import { ImageViewer } from '../../components/editor/image-viewer';
12-
import { Buffer } from 'buffer';
12+
import { formatJson } from '../../util/json';
1313

1414
export interface EditorFormatter {
1515
language: string;
@@ -107,20 +107,7 @@ export const Formatters: { [key in ViewableContentType]: Formatter } = {
107107
render: (input: Buffer, headers?: Headers) => {
108108
if (input.byteLength < 10_000) {
109109
const inputAsString = bufferToString(input);
110-
111-
try {
112-
// For short-ish inputs, we return synchronously - conveniently this avoids
113-
// showing the loading spinner that churns the layout in short content cases.
114-
return JSON.stringify(
115-
JSON.parse(inputAsString),
116-
null,
117-
2
118-
);
119-
// ^ Same logic as in UI-worker-formatter
120-
} catch (e) {
121-
// Fallback to showing the raw un-formatted JSON:
122-
return inputAsString;
123-
}
110+
return formatJson(inputAsString, { formatRecords: false });
124111
} else {
125112
return observablePromise(
126113
formatBufferAsync(input, 'json', headers)
@@ -134,24 +121,8 @@ export const Formatters: { [key in ViewableContentType]: Formatter } = {
134121
isEditApplicable: false,
135122
render: (input: Buffer, headers?: Headers) => {
136123
if (input.byteLength < 10_000) {
137-
try {
138-
let records = new Array<string>();
139-
const separator = input[input.length - 1];
140-
const separatorString = Buffer.of(separator).toString('utf8');
141-
142-
splitBuffer(input, separator).forEach((recordBuffer: Buffer) => {
143-
if (recordBuffer.length > 0) {
144-
const record = recordBuffer.toString('utf-8');
145-
records.push(record + separatorString);
146-
}
147-
});
148-
149-
return records.join('\n');
150-
// ^ Same logic as in UI-worker-formatter
151-
} catch (e) {
152-
// Fallback to showing the raw un-formatted:
153-
return bufferToString(input);
154-
}
124+
const inputAsString = bufferToString(input);
125+
return formatJson(inputAsString, { formatRecords: true });
155126
} else {
156127
return observablePromise(
157128
formatBufferAsync(input, 'json-records', headers)

src/services/ui-worker-formatters.ts

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ import {
66
import * as beautifyXml from 'xml-beautifier';
77

88
import { Headers } from '../types';
9-
import { bufferToHex, bufferToString, getReadableSize, splitBuffer } from '../util/buffer';
9+
import { bufferToHex, bufferToString, getReadableSize } from '../util/buffer';
1010
import { parseRawProtobuf, extractProtobufFromGrpc } from '../util/protobuf';
11+
import { formatJson } from '../util/json';
1112

1213
const truncationMarker = (size: string) => `\n[-- Truncated to ${size} --]`;
1314
const FIVE_MB = 1024 * 1024 * 5;
@@ -74,29 +75,11 @@ const WorkerFormatters = {
7475
},
7576
json: (content: Buffer) => {
7677
const asString = content.toString('utf8');
77-
try {
78-
return JSON.stringify(JSON.parse(asString), null, 2);
79-
} catch (e) {
80-
return asString;
81-
}
78+
return formatJson(asString, { formatRecords: false });
8279
},
8380
'json-records': (content: Buffer) => {
84-
try {
85-
let records = new Array();
86-
const separator = content[content.length - 1];
87-
const separatorString = Buffer.of(separator).toString('utf8');
88-
89-
splitBuffer(content, separator).forEach((recordBuffer: Buffer) => {
90-
if (recordBuffer.length > 0) {
91-
const record = recordBuffer.toString('utf-8');
92-
records.push(record + separatorString);
93-
}
94-
});
95-
96-
return records.join('\n');
97-
} catch (e) {
98-
return content.toString('utf8');
99-
}
81+
const asString = content.toString('utf8');
82+
return formatJson(asString, { formatRecords: true });
10083
},
10184
javascript: (content: Buffer) => {
10285
return beautifyJs(content.toString('utf8'), {

src/util/json.ts

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
import {
2+
createScanner as createJsonScanner,
3+
SyntaxKind as JsonSyntaxKind
4+
} from 'jsonc-parser';
5+
16
const JSON_START_REGEX = /^\s*[\[\{tfn"\d-]/; // Optional whitespace, then start array/object/true/false/null/string/number
27

38
const JSON_TEXT_SEQ_START_REGEX = /^\u001E\s*[\[\{]/; // Record separate, optional whitespace, then array/object
@@ -56,3 +61,211 @@ export const RECORD_SEPARATOR_CHARS = [
5661
'\n',
5762
'\r'
5863
];
64+
65+
// A *very* forgiving & flexible JSON formatter. This will correctly format even things that
66+
// will fail validation later, such as trailing commas, unquoted keys, comments, etc.
67+
export function formatJson(text: string, options: { formatRecords: boolean } = { formatRecords: false }): string {
68+
const scanner = createJsonScanner(text);
69+
70+
let result = "";
71+
let indent = 0;
72+
let token: JsonSyntaxKind;
73+
74+
const indentString = ' ';
75+
let needsIndent = false;
76+
let previousToken: JsonSyntaxKind | null = null;
77+
78+
let betweenRecords = false;
79+
80+
while ((token = scanner.scan()) !== JsonSyntaxKind.EOF) {
81+
const tokenOffset = scanner.getTokenOffset();
82+
const tokenLength = scanner.getTokenLength();
83+
const tokenText = text.slice(tokenOffset, tokenOffset + tokenLength);
84+
85+
if (options.formatRecords && indent === 0) {
86+
betweenRecords = true;
87+
}
88+
89+
// Skip over explicit 'record separator' characters, which can cause parsing problems
90+
// when parsing JSON records:
91+
if (betweenRecords && tokenText[0] === '\u001E') {
92+
scanner.setPosition(tokenOffset + 1);
93+
continue;
94+
}
95+
96+
// Ignore irrelevant whitespace (internally or between records) - we'll handle that ourselves
97+
if (token === JsonSyntaxKind.Trivia || token === JsonSyntaxKind.LineBreakTrivia) {
98+
continue;
99+
}
100+
101+
if (betweenRecords) {
102+
// We've finished one record and we have another coming that won't get a newline
103+
// automatically: add an extra newline to properly separate records if required.
104+
betweenRecords = false;
105+
if (result && result[result.length - 1] !== '\n' && !isValueToken(token)) {
106+
result += '\n';
107+
}
108+
}
109+
110+
if (needsIndent) {
111+
result += indentString.repeat(indent);
112+
needsIndent = false;
113+
}
114+
115+
switch (token) {
116+
case JsonSyntaxKind.OpenBraceToken:
117+
case JsonSyntaxKind.OpenBracketToken:
118+
result += tokenText;
119+
indent++;
120+
121+
const afterOpener = scanAhead(scanner);
122+
const isClosing = afterOpener === JsonSyntaxKind.CloseBraceToken ||
123+
afterOpener === JsonSyntaxKind.CloseBracketToken;
124+
if (
125+
!isClosing &&
126+
afterOpener !== JsonSyntaxKind.EOF &&
127+
afterOpener !== JsonSyntaxKind.LineCommentTrivia
128+
) {
129+
result += '\n';
130+
needsIndent = true;
131+
}
132+
break;
133+
134+
case JsonSyntaxKind.CloseBraceToken:
135+
case JsonSyntaxKind.CloseBracketToken:
136+
const wasEmpty = previousToken === JsonSyntaxKind.OpenBraceToken ||
137+
previousToken === JsonSyntaxKind.OpenBracketToken;
138+
139+
let indentUnderflow = indent === 0;
140+
indent = Math.max(0, indent - 1);
141+
142+
if (!wasEmpty) {
143+
if (!result.endsWith('\n')) {
144+
result += '\n';
145+
}
146+
result += indentString.repeat(indent);
147+
}
148+
149+
result += tokenText;
150+
if (indentUnderflow) result += '\n';
151+
152+
break;
153+
154+
case JsonSyntaxKind.CommaToken:
155+
result += tokenText;
156+
157+
const afterComma = scanAhead(scanner);
158+
if (
159+
afterComma !== JsonSyntaxKind.LineCommentTrivia &&
160+
afterComma !== JsonSyntaxKind.BlockCommentTrivia &&
161+
afterComma !== JsonSyntaxKind.CloseBraceToken &&
162+
afterComma !== JsonSyntaxKind.CloseBracketToken &&
163+
afterComma !== JsonSyntaxKind.EOF &&
164+
afterComma !== JsonSyntaxKind.CommaToken
165+
) {
166+
result += '\n';
167+
needsIndent = true;
168+
}
169+
break;
170+
171+
case JsonSyntaxKind.ColonToken:
172+
result += tokenText;
173+
result += ' ';
174+
break;
175+
176+
case JsonSyntaxKind.LineCommentTrivia:
177+
const needsNewlineBefore = (
178+
previousToken === JsonSyntaxKind.OpenBraceToken ||
179+
previousToken === JsonSyntaxKind.OpenBracketToken
180+
) && !result.endsWith('\n');
181+
182+
if (needsNewlineBefore) {
183+
result += '\n';
184+
needsIndent = true;
185+
}
186+
187+
if (needsIndent) {
188+
result += indentString.repeat(indent);
189+
needsIndent = false;
190+
result += tokenText;
191+
} else {
192+
const trimmedResult = result.trimEnd();
193+
if (result.length > trimmedResult.length) {
194+
result = trimmedResult + tokenText;
195+
} else {
196+
result += ' ' + tokenText;
197+
}
198+
}
199+
200+
const afterComment = scanAhead(scanner);
201+
if (
202+
afterComment !== JsonSyntaxKind.CloseBraceToken &&
203+
afterComment !== JsonSyntaxKind.CloseBracketToken &&
204+
afterComment !== JsonSyntaxKind.EOF
205+
) {
206+
result += '\n';
207+
needsIndent = true;
208+
}
209+
break;
210+
211+
case JsonSyntaxKind.BlockCommentTrivia:
212+
const prevChar = result[result.length - 1];
213+
if (prevChar === '\n' || (prevChar === ' ' && result[result.length - 2] === '\n')) {
214+
result += tokenText;
215+
} else {
216+
result += ' ' + tokenText;
217+
}
218+
219+
const afterBlock = scanAhead(scanner);
220+
if (
221+
afterBlock !== JsonSyntaxKind.CommaToken &&
222+
afterBlock !== JsonSyntaxKind.CloseBraceToken &&
223+
afterBlock !== JsonSyntaxKind.CloseBracketToken &&
224+
afterBlock !== JsonSyntaxKind.EOF
225+
) {
226+
result += '\n';
227+
needsIndent = true;
228+
}
229+
break;
230+
231+
default:
232+
const followsValue = isValueToken(previousToken);
233+
if (followsValue && isValueToken(token) && !result.endsWith('\n')) {
234+
// Missing comma detected between sequential values,
235+
// so add a newline for readability
236+
result += '\n';
237+
result += indentString.repeat(indent);
238+
}
239+
240+
result += tokenText;
241+
break;
242+
}
243+
244+
previousToken = token;
245+
}
246+
247+
return result;
248+
}
249+
250+
function isValueToken(token: JsonSyntaxKind | null): boolean {
251+
return token === JsonSyntaxKind.StringLiteral ||
252+
token === JsonSyntaxKind.NumericLiteral ||
253+
token === JsonSyntaxKind.TrueKeyword ||
254+
token === JsonSyntaxKind.FalseKeyword ||
255+
token === JsonSyntaxKind.NullKeyword ||
256+
token === JsonSyntaxKind.CloseBraceToken ||
257+
token === JsonSyntaxKind.CloseBracketToken;
258+
}
259+
260+
function scanAhead(scanner: any): JsonSyntaxKind {
261+
const savedPosition = scanner.getPosition();
262+
263+
let nextToken = scanner.scan();
264+
while (nextToken === JsonSyntaxKind.Trivia ||
265+
nextToken === JsonSyntaxKind.LineBreakTrivia) {
266+
nextToken = scanner.scan();
267+
}
268+
269+
scanner.setPosition(savedPosition);
270+
return nextToken;
271+
}

0 commit comments

Comments
 (0)