From 8f1b039eef3a365c2b58e90aeb6caa15296a3c3d Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Fri, 27 Jun 2025 12:33:12 +0100 Subject: [PATCH 1/9] ci: Bring back MDX caching with public folder cache --- .gitignore | 5 ++-- src/mdx.ts | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 7c590f048bd13c..1080e5838761c0 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,6 @@ npm-debug.log* yarn-debug.log* yarn-error.log* -# Ignore generated export markdown files -/public/md-exports/ - # Runtime data pids *.pid @@ -96,6 +93,8 @@ public/page-data # tsbuildinfo file generated by CI tsconfig.tsbuildinfo +# Ignore generated files +/public/md-exports/ public/mdx-images/* # yalc diff --git a/src/mdx.ts b/src/mdx.ts index f9c363d713a774..9ca388a8a63d8a 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -3,8 +3,19 @@ import matter from 'gray-matter'; import {s} from 'hastscript'; import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; -import {access, opendir, readFile} from 'node:fs/promises'; +import {BinaryLike, createHash} from 'node:crypto'; +import {createReadStream, createWriteStream, mkdirSync} from 'node:fs'; +import {access, cp, opendir, readFile} from 'node:fs/promises'; import path from 'node:path'; +// @ts-expect-error ts(2305) -- For some reason "compose" is not recognized in the types +import {compose, Readable} from 'node:stream'; +import {json} from 'node:stream/consumers'; +import {pipeline} from 'node:stream/promises'; +import { + constants as zlibConstants, + createBrotliCompress, + createBrotliDecompress, +} from 'node:zlib'; import {limitFunction} from 'p-limit'; import rehypeAutolinkHeadings from 'rehype-autolink-headings'; import rehypePresetMinify from 'rehype-preset-minify'; @@ -48,6 +59,33 @@ const root = process.cwd(); // Functions which looks like AWS Lambda and we get `EMFILE` errors when trying to open // so many files at once. const FILE_CONCURRENCY_LIMIT = 200; +const CACHE_COMPRESS_LEVEL = 4; +const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); +mkdirSync(CACHE_DIR, {recursive: true}); + +const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); + +async function readCacheFile(file: string): Promise { + const reader = createReadStream(file); + const decompressor = createBrotliDecompress(); + + return (await json(compose(reader, decompressor))) as T; +} + +async function writeCacheFile(file: string, data: string) { + await pipeline( + Readable.from(data), + createBrotliCompress({ + chunkSize: 32 * 1024, + params: { + [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, + [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, + [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, + }, + }), + createWriteStream(file) + ); +} function formatSlug(slug: string) { return slug.replace(/\.(mdx|md)/, ''); @@ -484,6 +522,20 @@ export async function getFileBySlug(slug: string): Promise { ); } + const cacheKey = md5(source); + const cacheFile = path.join(CACHE_DIR, `${cacheKey}.br`); + + try { + const cached = await readCacheFile(cacheFile); + return cached; + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR' && err.code !== 'Z_BUF_ERROR') { + // If cache is corrupted, ignore and proceed + // eslint-disable-next-line no-console + console.warn(`Failed to read MDX cache: ${cacheFile}`, err); + } + } + process.env.ESBUILD_BINARY_PATH = path.join( root, 'node_modules', @@ -496,6 +548,8 @@ export async function getFileBySlug(slug: string): Promise { // cwd is how mdx-bundler knows how to resolve relative paths const cwd = path.dirname(sourcePath); + const assetsCacheDir = path.join(CACHE_DIR, cacheKey); + const outdir = path.join(root, 'public', 'mdx-images'); const result = await bundleMDX({ source, @@ -577,9 +631,10 @@ export async function getFileBySlug(slug: string): Promise { // inline svgs '.svg': 'dataurl', }; + options.metafile = true; // Set the `outdir` to a public location for this bundle. // this where this images will be copied - options.outdir = path.join(root, 'public', 'mdx-images'); + options.outdir = assetsCacheDir; // Set write to true so that esbuild will output the files. options.write = true; @@ -609,6 +664,15 @@ export async function getFileBySlug(slug: string): Promise { }, }; + await cp(assetsCacheDir, outdir, { + recursive: true, + force: true, + }); + writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { + // eslint-disable-next-line no-console + console.warn(`Failed to write MDX cache: ${cacheFile}`, e); + }); + return resultObj; } From 73680b196d05d892eab7c20a80b06a22f074c09b Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Fri, 27 Jun 2025 14:55:07 +0100 Subject: [PATCH 2/9] some more stuff --- src/mdx.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 9ca388a8a63d8a..d6ada7d564cd73 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -1,4 +1,3 @@ -import {cache} from 'react'; import matter from 'gray-matter'; import {s} from 'hastscript'; import yaml from 'js-yaml'; @@ -666,6 +665,7 @@ export async function getFileBySlug(slug: string): Promise { await cp(assetsCacheDir, outdir, { recursive: true, + errorOnExist: false, force: true, }); writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { @@ -676,9 +676,19 @@ export async function getFileBySlug(slug: string): Promise { return resultObj; } +const fileBySlugCache = new Map>(); + /** * Cache the result of {@link getFileBySlug}. * * This is useful for performance when rendering the same file multiple times. */ -export const getFileBySlugWithCache = cache(getFileBySlug); +export function getFileBySlugWithCache(slug: string): Promise { + let cached = fileBySlugCache.get(slug); + if (!cached) { + cached = getFileBySlug(slug); + fileBySlugCache.set(slug, cached); + } + + return cached; +} From fe4a13c1132c6b0dc0f491bb21a31be316ae80a0 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Fri, 27 Jun 2025 19:05:44 +0100 Subject: [PATCH 3/9] fix issue when r2 bucket is empty --- scripts/generate-md-exports.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 06a022b38a5b06..337248f1773376 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -109,7 +109,7 @@ async function createWork() { }) ); continuationToken = response.NextContinuationToken; - for (const {Key, ETag} of response.Contents) { + for (const {Key, ETag} of response.Contents || []) { existingFilesOnR2.set(Key, ETag.slice(1, -1)); // Remove quotes from ETag } } while (continuationToken); From 5db6a47cea020db72f523788d89b2d6c1d667587 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Fri, 27 Jun 2025 19:08:42 +0100 Subject: [PATCH 4/9] fix mkdir error? --- src/mdx.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index d6ada7d564cd73..28acbd0fa17fef 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -4,7 +4,7 @@ import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; import {BinaryLike, createHash} from 'node:crypto'; import {createReadStream, createWriteStream, mkdirSync} from 'node:fs'; -import {access, cp, opendir, readFile} from 'node:fs/promises'; +import {access, cp, mkdir, opendir, readFile} from 'node:fs/promises'; import path from 'node:path'; // @ts-expect-error ts(2305) -- For some reason "compose" is not recognized in the types import {compose, Readable} from 'node:stream'; @@ -630,7 +630,6 @@ export async function getFileBySlug(slug: string): Promise { // inline svgs '.svg': 'dataurl', }; - options.metafile = true; // Set the `outdir` to a public location for this bundle. // this where this images will be copied options.outdir = assetsCacheDir; @@ -663,6 +662,7 @@ export async function getFileBySlug(slug: string): Promise { }, }; + await mkdir(outdir, {recursive: true}); await cp(assetsCacheDir, outdir, { recursive: true, errorOnExist: false, From f0218f43c5aa99821b2731361ae7b965da725bea Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Fri, 27 Jun 2025 20:39:15 +0100 Subject: [PATCH 5/9] fix cache restoration --- src/mdx.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 28acbd0fa17fef..546c1e2f6a22a6 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -523,9 +523,15 @@ export async function getFileBySlug(slug: string): Promise { const cacheKey = md5(source); const cacheFile = path.join(CACHE_DIR, `${cacheKey}.br`); + const assetsCacheDir = path.join(CACHE_DIR, cacheKey); + const outdir = path.join(root, 'public', 'mdx-images'); + await mkdir(outdir, {recursive: true}); try { - const cached = await readCacheFile(cacheFile); + const [cached, _] = await Promise.all([ + readCacheFile(cacheFile), + cp(assetsCacheDir, outdir, {recursive: true}), + ]); return cached; } catch (err) { if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR' && err.code !== 'Z_BUF_ERROR') { @@ -547,8 +553,6 @@ export async function getFileBySlug(slug: string): Promise { // cwd is how mdx-bundler knows how to resolve relative paths const cwd = path.dirname(sourcePath); - const assetsCacheDir = path.join(CACHE_DIR, cacheKey); - const outdir = path.join(root, 'public', 'mdx-images'); const result = await bundleMDX({ source, @@ -662,12 +666,7 @@ export async function getFileBySlug(slug: string): Promise { }, }; - await mkdir(outdir, {recursive: true}); - await cp(assetsCacheDir, outdir, { - recursive: true, - errorOnExist: false, - force: true, - }); + await cp(assetsCacheDir, outdir, {recursive: true}); writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { // eslint-disable-next-line no-console console.warn(`Failed to write MDX cache: ${cacheFile}`, e); From 8e31ed2dbde605f2e5c60b51c80d31efa557f0db Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Fri, 27 Jun 2025 21:04:10 +0100 Subject: [PATCH 6/9] limit caching to CI again --- src/mdx.ts | 52 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 546c1e2f6a22a6..0e9ac7b487edf9 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -521,23 +521,33 @@ export async function getFileBySlug(slug: string): Promise { ); } - const cacheKey = md5(source); - const cacheFile = path.join(CACHE_DIR, `${cacheKey}.br`); - const assetsCacheDir = path.join(CACHE_DIR, cacheKey); + let cacheKey: string | null = null; + let cacheFile: string | null = null; + let assetsCacheDir: string | null = null; const outdir = path.join(root, 'public', 'mdx-images'); await mkdir(outdir, {recursive: true}); - try { - const [cached, _] = await Promise.all([ - readCacheFile(cacheFile), - cp(assetsCacheDir, outdir, {recursive: true}), - ]); - return cached; - } catch (err) { - if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR' && err.code !== 'Z_BUF_ERROR') { - // If cache is corrupted, ignore and proceed - // eslint-disable-next-line no-console - console.warn(`Failed to read MDX cache: ${cacheFile}`, err); + if (process.env.CI) { + cacheKey = md5(source); + cacheFile = path.join(CACHE_DIR, `${cacheKey}.br`); + assetsCacheDir = path.join(CACHE_DIR, cacheKey); + + try { + const [cached, _] = await Promise.all([ + readCacheFile(cacheFile), + cp(assetsCacheDir, outdir, {recursive: true}), + ]); + return cached; + } catch (err) { + if ( + err.code !== 'ENOENT' && + err.code !== 'ABORT_ERR' && + err.code !== 'Z_BUF_ERROR' + ) { + // If cache is corrupted, ignore and proceed + // eslint-disable-next-line no-console + console.warn(`Failed to read MDX cache: ${cacheFile}`, err); + } } } @@ -636,7 +646,7 @@ export async function getFileBySlug(slug: string): Promise { }; // Set the `outdir` to a public location for this bundle. // this where this images will be copied - options.outdir = assetsCacheDir; + options.outdir = assetsCacheDir || outdir; // Set write to true so that esbuild will output the files. options.write = true; @@ -666,11 +676,13 @@ export async function getFileBySlug(slug: string): Promise { }, }; - await cp(assetsCacheDir, outdir, {recursive: true}); - writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { - // eslint-disable-next-line no-console - console.warn(`Failed to write MDX cache: ${cacheFile}`, e); - }); + if (assetsCacheDir && cacheFile) { + await cp(assetsCacheDir, outdir, {recursive: true}); + writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { + // eslint-disable-next-line no-console + console.warn(`Failed to write MDX cache: ${cacheFile}`, e); + }); + } return resultObj; } From b29231e866f4d8943a47d41d1a3590c1b0d268d4 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Fri, 27 Jun 2025 23:08:10 +0100 Subject: [PATCH 7/9] much better `readable.from()` usage Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/mdx.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 0e9ac7b487edf9..9a2cab90236a0b 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -72,14 +72,15 @@ async function readCacheFile(file: string): Promise { } async function writeCacheFile(file: string, data: string) { + const bufferData = Buffer.from(data); await pipeline( - Readable.from(data), + Readable.from(bufferData), createBrotliCompress({ chunkSize: 32 * 1024, params: { [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, - [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, + [zlibConstants.BROTLI_PARAM_SIZE_HINT]: bufferData.length, }, }), createWriteStream(file) From 1e70062191d475ef47455985915e52fa4b9c678a Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Fri, 27 Jun 2025 23:08:38 +0100 Subject: [PATCH 8/9] fix weird comment grammar Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/mdx.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdx.ts b/src/mdx.ts index 9a2cab90236a0b..4d59bf9ac59103 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -646,7 +646,7 @@ export async function getFileBySlug(slug: string): Promise { '.svg': 'dataurl', }; // Set the `outdir` to a public location for this bundle. - // this where this images will be copied + // this is where these images will be copied options.outdir = assetsCacheDir || outdir; // Set write to true so that esbuild will output the files. From 377a562435ee7b46777d0b0349058b866e6b205c Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Sat, 28 Jun 2025 07:53:58 +0100 Subject: [PATCH 9/9] Comment about switching build out directory --- src/mdx.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mdx.ts b/src/mdx.ts index 4d59bf9ac59103..846ebbbbcec45f 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -647,6 +647,10 @@ export async function getFileBySlug(slug: string): Promise { }; // Set the `outdir` to a public location for this bundle. // this is where these images will be copied + // the reason we use the cache folder when it's + // enabled is because mdx-images is a dumping ground + // for all images, so we cannot filter it out only + // for this specific slug easily options.outdir = assetsCacheDir || outdir; // Set write to true so that esbuild will output the files.