Skip to content

Commit 61c73a3

Browse files
committed
brotli to the rescue!
1 parent 871e0fc commit 61c73a3

File tree

3 files changed

+151
-45
lines changed

3 files changed

+151
-45
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@
143143
"@types/dompurify": "3.0.5"
144144
},
145145
"volta": {
146-
"node": "20.11.0",
146+
"node": "22.16.0",
147147
"yarn": "1.22.22"
148148
}
149149
}

scripts/generate-md-exports.mjs

Lines changed: 90 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,35 @@
22
/* eslint-disable no-console */
33
import {selectAll} from 'hast-util-select';
44
import {createHash} from 'node:crypto';
5-
import {constants as fsConstants, existsSync} from 'node:fs';
6-
import {copyFile, mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises';
5+
import {createReadStream, createWriteStream, existsSync} from 'node:fs';
6+
import {mkdir, opendir, readFile, rm} from 'node:fs/promises';
77
import {cpus} from 'node:os';
88
import * as path from 'node:path';
9+
import {Readable} from 'node:stream';
910
import {fileURLToPath} from 'node:url';
1011
import {isMainThread, parentPort, Worker, workerData} from 'node:worker_threads';
12+
import {
13+
constants as zlibConstants,
14+
createBrotliCompress,
15+
createBrotliDecompress,
16+
} from 'node:zlib';
1117
import rehypeParse from 'rehype-parse';
1218
import rehypeRemark from 'rehype-remark';
1319
import remarkGfm from 'remark-gfm';
1420
import remarkStringify from 'remark-stringify';
1521
import {unified} from 'unified';
1622
import {remove} from 'unist-util-remove';
1723

24+
const CACHE_COMPRESS_LEVEL = 4;
25+
1826
function taskFinishHandler(data) {
1927
if (data.failedTasks.length === 0) {
2028
console.log(`✅ Worker[${data.id}]: ${data.success} files successfully.`);
21-
} else {
22-
hasErrors = true;
23-
console.error(`❌ Worker[${data.id}]: ${data.failedTasks.length} files failed:`);
24-
console.error(data.failedTasks);
29+
return false;
2530
}
31+
console.error(`❌ Worker[${data.id}]: ${data.failedTasks.length} files failed:`);
32+
console.error(data.failedTasks);
33+
return true;
2634
}
2735

2836
async function createWork() {
@@ -37,7 +45,7 @@ async function createWork() {
3745
const INPUT_DIR = path.join(root, '.next', 'server', 'app');
3846
const OUTPUT_DIR = path.join(root, 'public', 'md-exports');
3947

40-
const CACHE_VERSION = 1;
48+
const CACHE_VERSION = 2;
4149
const CACHE_DIR = path.join(root, '.next', 'cache', 'md-exports', `v${CACHE_VERSION}`);
4250
const noCache = !existsSync(CACHE_DIR);
4351
if (noCache) {
@@ -86,7 +94,7 @@ async function createWork() {
8694
workerData: {id, noCache, cacheDir: CACHE_DIR, tasks: workerTasks[id]},
8795
});
8896
let hasErrors = false;
89-
worker.on('message', taskFinishHandler);
97+
worker.on('message', data => (hasErrors = taskFinishHandler(data)));
9098
worker.on('error', reject);
9199
worker.on('exit', code => {
92100
if (code !== 0) {
@@ -104,7 +112,11 @@ async function createWork() {
104112
cacheDir: CACHE_DIR,
105113
tasks: workerTasks[workerTasks.length - 1],
106114
id: workerTasks.length - 1,
107-
}).then(taskFinishHandler)
115+
}).then(data => {
116+
if (taskFinishHandler(data)) {
117+
throw new Error(`Worker[${data.id}] had some errors.`);
118+
}
119+
})
108120
);
109121

110122
await Promise.all(workerPromises);
@@ -121,45 +133,83 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) {
121133
const cacheFile = path.join(cacheDir, hash);
122134
if (!noCache) {
123135
try {
124-
await copyFile(cacheFile, target, fsConstants.COPYFILE_FICLONE);
136+
const {resolve, reject, promise} = Promise.withResolvers();
137+
const reader = createReadStream(cacheFile);
138+
reader.on('error', reject);
139+
reader.pause();
140+
141+
const writer = createWriteStream(target, {
142+
encoding: 'utf8',
143+
});
144+
writer.on('error', reject);
145+
146+
const decompressor = createBrotliDecompress();
147+
const stream = reader.pipe(decompressor).pipe(writer);
148+
stream.on('error', reject);
149+
stream.on('finish', resolve);
150+
151+
reader.resume();
152+
153+
await promise;
125154
return;
126155
} catch {
127156
// pass
128157
}
129158
}
130159

131-
await writeFile(
132-
target,
133-
String(
134-
await unified()
135-
.use(rehypeParse)
136-
// Need the `main div > hgroup` selector for the headers
137-
.use(() => tree => selectAll('main div > hgroup, div#main', tree))
138-
// If we don't do this wrapping, rehypeRemark just returns an empty string -- yeah WTF?
139-
.use(() => tree => ({
140-
type: 'element',
141-
tagName: 'div',
142-
properties: {},
143-
children: tree,
144-
}))
145-
.use(rehypeRemark, {
146-
document: false,
147-
handlers: {
148-
// Remove buttons as they usually get confusing in markdown, especially since we use them as tab headers
149-
button() {},
150-
},
151-
})
152-
// We end up with empty inline code blocks, probably from some tab logic in the HTML, remove them
153-
.use(() => tree => remove(tree, {type: 'inlineCode', value: ''}))
154-
.use(remarkGfm)
155-
.use(remarkStringify)
156-
.process(text)
157-
)
160+
const data = String(
161+
await unified()
162+
.use(rehypeParse)
163+
// Need the `main div > hgroup` selector for the headers
164+
.use(() => tree => selectAll('main div > hgroup, div#main', tree))
165+
// If we don't do this wrapping, rehypeRemark just returns an empty string -- yeah WTF?
166+
.use(() => tree => ({
167+
type: 'element',
168+
tagName: 'div',
169+
properties: {},
170+
children: tree,
171+
}))
172+
.use(rehypeRemark, {
173+
document: false,
174+
handlers: {
175+
// Remove buttons as they usually get confusing in markdown, especially since we use them as tab headers
176+
button() {},
177+
},
178+
})
179+
// We end up with empty inline code blocks, probably from some tab logic in the HTML, remove them
180+
.use(() => tree => remove(tree, {type: 'inlineCode', value: ''}))
181+
.use(remarkGfm)
182+
.use(remarkStringify)
183+
.process(text)
158184
);
159-
copyFile(target, cacheFile, fsConstants.COPYFILE_FICLONE).catch(error => {
160-
// eslint-disable-next-line no-console
161-
console.error(`Failed to cache file ${cacheFile}:`, error);
185+
const reader = Readable.from(data);
186+
reader.pause();
187+
188+
const {resolve, reject, promise} = Promise.withResolvers();
189+
const writer = createWriteStream(target, {
190+
encoding: 'utf8',
191+
});
192+
writer.on('error', reject);
193+
194+
const compressor = createBrotliCompress({
195+
chunkSize: 32 * 1024,
196+
params: {
197+
[zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT,
198+
[zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL,
199+
[zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length,
200+
},
162201
});
202+
const cacheWriter = createWriteStream(cacheFile);
203+
204+
const writeStream = reader.pipe(writer);
205+
writeStream.on('error', reject);
206+
writeStream.on('finish', resolve);
207+
208+
const cacheWriteStream = reader.pipe(compressor).pipe(cacheWriter);
209+
cacheWriteStream.on('error', err => console.warn('Error writing cache file:', err));
210+
reader.resume();
211+
212+
await promise;
163213
}
164214

165215
async function processTaskList({id, tasks, cacheDir, noCache}) {

src/mdx.ts

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,15 @@ import matter from 'gray-matter';
55
import {s} from 'hastscript';
66
import yaml from 'js-yaml';
77
import {bundleMDX} from 'mdx-bundler';
8-
import {mkdirSync} from 'node:fs';
9-
import {access, opendir, readFile, writeFile} from 'node:fs/promises';
8+
import {createReadStream, createWriteStream, mkdirSync} from 'node:fs';
9+
import {access, opendir, readFile} from 'node:fs/promises';
1010
import path from 'node:path';
11+
import {Readable} from 'node:stream';
12+
import {
13+
constants as zlibConstants,
14+
createBrotliCompress,
15+
createBrotliDecompress,
16+
} from 'node:zlib';
1117
import rehypeAutolinkHeadings from 'rehype-autolink-headings';
1218
import rehypePresetMinify from 'rehype-preset-minify';
1319
import rehypePrismDiff from 'rehype-prism-diff';
@@ -35,11 +41,61 @@ import {isNotNil} from './utils';
3541
import {isVersioned, VERSION_INDICATOR} from './versioning';
3642

3743
const root = process.cwd();
44+
const CACHE_COMPRESS_LEVEL = 4;
3845
const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler');
3946
mkdirSync(CACHE_DIR, {recursive: true});
4047

4148
const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex');
4249

50+
async function readCacheFile(file: string): Promise<string> {
51+
const {resolve, reject, promise} = Promise.withResolvers<string>();
52+
53+
const reader = createReadStream(file);
54+
reader.on('error', reject);
55+
reader.pause();
56+
57+
const decompressor = createBrotliDecompress();
58+
decompressor.on('error', reject);
59+
60+
const buffers: Buffer[] = [];
61+
const stream = reader.pipe(decompressor);
62+
stream.on('data', chunk => {
63+
buffers.push(chunk);
64+
});
65+
stream.on('finish', () => {
66+
resolve(Buffer.concat(buffers).toString('utf8'));
67+
});
68+
69+
reader.resume();
70+
return await promise;
71+
}
72+
73+
async function writeCacheFile(file: string, data: string) {
74+
const {resolve, reject, promise} = Promise.withResolvers<void>();
75+
76+
const reader = Readable.from(data);
77+
reader.pause();
78+
79+
const writer = createWriteStream(file);
80+
writer.on('error', reject);
81+
82+
const compressor = createBrotliCompress({
83+
chunkSize: 32 * 1024,
84+
params: {
85+
[zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT,
86+
[zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL,
87+
[zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length,
88+
},
89+
});
90+
const stream = reader.pipe(compressor).pipe(writer);
91+
stream.on('error', reject);
92+
stream.on('finish', resolve);
93+
94+
reader.resume();
95+
96+
await promise;
97+
}
98+
4399
function formatSlug(slug: string) {
44100
return slug.replace(/\.(mdx|md)/, '');
45101
}
@@ -439,7 +495,7 @@ export async function getFileBySlug(slug: string) {
439495
const cacheFile = path.join(CACHE_DIR, cacheKey);
440496

441497
try {
442-
const cached = JSON.parse(await readFile(cacheFile, 'utf8'));
498+
const cached = JSON.parse(await readCacheFile(cacheFile));
443499
return cached;
444500
} catch (err) {
445501
if (err.code !== 'ENOENT') {
@@ -574,7 +630,7 @@ export async function getFileBySlug(slug: string) {
574630
},
575631
};
576632

577-
writeFile(cacheFile, JSON.stringify(resultObj), 'utf8').catch(e => {
633+
writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => {
578634
// eslint-disable-next-line no-console
579635
console.warn(`Failed to write MDX cache: ${cacheFile}`, e);
580636
});

0 commit comments

Comments
 (0)