Skip to content

Commit 53a6f00

Browse files
committed
Use the new parser
1 parent 26b9a84 commit 53a6f00

File tree

2 files changed

+3
-76
lines changed

2 files changed

+3
-76
lines changed

extensions/ql-vscode/src/common/jsonl-reader.ts

Lines changed: 2 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { stat } from "fs/promises";
22
import { createReadStream } from "fs-extra";
3-
import { createInterface } from "readline";
3+
4+
const doubleLineBreakRegexp = /\n\r?\n/;
45

56
/**
67
* Read a file consisting of multiple JSON objects. Each object is separated from the previous one
@@ -13,64 +14,6 @@ export async function readJsonlFile<T>(
1314
path: string,
1415
handler: (value: T) => Promise<void>,
1516
logger?: { log: (message: string) => void },
16-
): Promise<void> {
17-
function parseJsonFromCurrentLines() {
18-
try {
19-
return JSON.parse(currentLineSequence.join("\n")) as T;
20-
} catch (e) {
21-
void logger?.log(
22-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
23-
`Error: Failed to parse at line ${lineCount} of ${path} as JSON: ${(e as any)?.message ?? "UNKNOWN REASON"}. Problematic line below:\n${JSON.stringify(currentLineSequence, null, 2)}`,
24-
);
25-
throw e;
26-
}
27-
}
28-
29-
function logProgress() {
30-
void logger?.log(
31-
`Processed ${lineCount} lines with ${parseCounts} parses...`,
32-
);
33-
}
34-
35-
void logger?.log(
36-
`Parsing ${path} (${(await stat(path)).size / 1024 / 1024} MB)...`,
37-
);
38-
const fileStream = createReadStream(path, "utf8");
39-
const rl = createInterface({
40-
input: fileStream,
41-
crlfDelay: Infinity,
42-
});
43-
44-
let lineCount = 0;
45-
let parseCounts = 0;
46-
let currentLineSequence: string[] = [];
47-
for await (const line of rl) {
48-
if (line === "") {
49-
// as mentioned above: a double newline sequence indicates the end of the current JSON object, so we parse it and pass it to the handler
50-
await handler(parseJsonFromCurrentLines());
51-
parseCounts++;
52-
currentLineSequence = [];
53-
} else {
54-
currentLineSequence.push(line);
55-
}
56-
lineCount++;
57-
if (lineCount % 1000000 === 0) {
58-
logProgress();
59-
}
60-
}
61-
// in case the file is not newline-terminated, we need to handle the last JSON object
62-
if (currentLineSequence.length > 0) {
63-
await handler(parseJsonFromCurrentLines());
64-
}
65-
logProgress();
66-
}
67-
68-
const doubleLineBreakRegexp = /\n\r?\n/;
69-
70-
export async function readJsonlFile2<T>(
71-
path: string,
72-
handler: (value: T) => Promise<void>,
73-
logger?: { log: (message: string) => void },
7417
): Promise<void> {
7518
void logger?.log(
7619
`Parsing ${path} (${(await stat(path)).size / 1024 / 1024} MB)...`,

extensions/ql-vscode/test/benchmarks/jsonl-reader.bench.ts

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import { readFile } from "fs-extra";
2-
import { readJsonlFile, readJsonlFile2 } from "../../src/common/jsonl-reader";
2+
import { readJsonlFile } from "../../src/common/jsonl-reader";
33
import { performance } from "perf_hooks";
44
import { join } from "path";
5-
import { createReadStream } from "fs";
6-
import { createInterface } from "readline";
75

86
/** An "obviously correct" implementation to test against. */
97
async function readJsonlReferenceImpl<T>(
@@ -21,18 +19,6 @@ async function readJsonlReferenceImpl<T>(
2119
}
2220
}
2321

24-
async function justReadline(
25-
path: string,
26-
handler: (value: unknown) => Promise<void>,
27-
) {
28-
const stream = createReadStream(path, "utf8");
29-
const rl = createInterface(stream);
30-
31-
for await (const line of rl) {
32-
await handler(line);
33-
}
34-
}
35-
3622
type ParserFn = (
3723
text: string,
3824
callback: (v: unknown) => Promise<void>,
@@ -41,8 +27,6 @@ type ParserFn = (
4127
const parsers: Record<string, ParserFn> = {
4228
readJsonlReferenceImpl,
4329
readJsonlFile,
44-
readJsonlFile2,
45-
justReadline,
4630
};
4731

4832
async function main() {

0 commit comments

Comments
 (0)