From 25baf2b2be32bf3504b5122dce8872e030401872 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Mon, 23 Jun 2025 14:15:53 +0100 Subject: [PATCH 01/23] ci(build): Try to cache mdx bundler --- scripts/generate-md-exports.mjs | 5 ++++- src/mdx.ts | 40 ++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index e50e6ee87e23cf..eac192bd904652 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -156,7 +156,10 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { .process(text) ) ); - await copyFile(target, cacheFile, fsConstants.COPYFILE_FICLONE); + copyFile(target, cacheFile, fsConstants.COPYFILE_FICLONE).catch(error => { + // eslint-disable-next-line no-console + console.error(`Failed to cache file ${cacheFile}:`, error); + }); } async function processTaskList({id, tasks, cacheDir, noCache}) { diff --git a/src/mdx.ts b/src/mdx.ts index d7dcc5ecda78d1..a5be0228388522 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -1,11 +1,13 @@ -import fs from 'fs'; -import path from 'path'; +import {createHash} from 'crypto'; import {cache} from 'react'; import matter from 'gray-matter'; import {s} from 'hastscript'; import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; +import fs from 'node:fs'; +import {readFile, writeFile} from 'node:fs/promises'; +import path from 'node:path'; import rehypeAutolinkHeadings from 'rehype-autolink-headings'; import rehypePresetMinify from 'rehype-preset-minify'; import rehypePrismDiff from 'rehype-prism-diff'; @@ -33,6 +35,10 @@ import {isNotNil} from './utils'; import {isVersioned, VERSION_INDICATOR} from './versioning'; const root = process.cwd(); +const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); +fs.mkdirSync(CACHE_DIR, {recursive: true}); + +const md5 = data => createHash('md5').update(data).digest('hex'); function formatSlug(slug: string) { return slug.replace(/\.(mdx|md)/, ''); @@ -304,8 +310,10 @@ export async function getFileBySlug(slug: string) { const configPath = path.join(root, slug.split(VERSION_INDICATOR)[0], 'config.yml'); let configFrontmatter: PlatformConfig | undefined; - if (fs.existsSync(configPath)) { - configFrontmatter = yaml.load(fs.readFileSync(configPath, 'utf8')) as PlatformConfig; + try { + configFrontmatter = yaml.load(await readFile(configPath, 'utf8')) as PlatformConfig; + } catch (e) { + // If the config file does not exist, we can ignore it. } let mdxPath = path.join(root, `${slug}.mdx`); @@ -352,7 +360,20 @@ export async function getFileBySlug(slug: string) { [mdxPath, mdxIndexPath, mdPath, versionedMdxIndexPath].find(fs.existsSync) ?? mdIndexPath; - const source = fs.readFileSync(sourcePath, 'utf8'); + const source = await readFile(sourcePath, 'utf8'); + const cacheKey = md5(source); + const cacheFile = path.join(CACHE_DIR, cacheKey); + + try { + const cached = JSON.parse(await readFile(cacheFile, 'utf8')); + return cached; + } catch (e) { + if (e.code !== 'ENOENT') { + // If cache is corrupted, ignore and proceed + // eslint-disable-next-line no-console + console.warn(`Failed to read MDX cache: ${cacheFile}`, e); + } + } process.env.ESBUILD_BINARY_PATH = path.join( root, @@ -469,7 +490,7 @@ export async function getFileBySlug(slug: string) { mergedFrontmatter = {...frontmatter, ...configFrontmatter}; } - return { + const resultObj = { matter: result.matter, mdxSource: code, toc, @@ -478,6 +499,13 @@ export async function getFileBySlug(slug: string) { slug, }, }; + + writeFile(cacheFile, JSON.stringify(resultObj), 'utf8').catch(e => { + // eslint-disable-next-line no-console + console.warn(`Failed to write MDX cache: ${cacheFile}`, e); + }); + + return resultObj; } /** From ac13fc3c09ce86eb003c03d11f7a77ce494a64f8 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Mon, 23 Jun 2025 19:36:16 +0100 Subject: [PATCH 02/23] asyncify for speed? --- app/sitemap.ts | 2 +- scripts/algolia.ts | 6 +- scripts/generate-md-exports.mjs | 2 +- src/docTree.ts | 2 +- src/mdx.ts | 283 ++++++++++++++++++++------------ 5 files changed, 180 insertions(+), 115 deletions(-) diff --git a/app/sitemap.ts b/app/sitemap.ts index 89498d034b61c4..173989e0a6fae6 100644 --- a/app/sitemap.ts +++ b/app/sitemap.ts @@ -5,7 +5,7 @@ import {getDevDocsFrontMatter, getDocsFrontMatter} from 'sentry-docs/mdx'; export default async function sitemap(): Promise { if (isDeveloperDocs) { - const docs = getDevDocsFrontMatter(); + const docs = await getDevDocsFrontMatter(); const baseUrl = 'https://develop.sentry.dev'; return docsToSitemap(docs, baseUrl); } diff --git a/scripts/algolia.ts b/scripts/algolia.ts index 4dc0dcb4c4248d..423b07578eae4f 100644 --- a/scripts/algolia.ts +++ b/scripts/algolia.ts @@ -64,9 +64,9 @@ indexAndUpload(); async function indexAndUpload() { // the page front matters are the source of truth for the static doc routes // as they are used directly by generateStaticParams() on [[..path]] page - const pageFrontMatters = isDeveloperDocs - ? getDevDocsFrontMatter() - : await getDocsFrontMatter(); + const pageFrontMatters = await( + isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter() + ); const records = await generateAlogliaRecords(pageFrontMatters); console.log('πŸ”₯ Generated %d new Algolia records.', records.length); const existingRecordIds = await fetchExistingRecordIds(index); diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index eac192bd904652..9cf98b8304df28 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -1,5 +1,5 @@ #!/usr/bin/env node - +/* eslint-disable no-console */ import {selectAll} from 'hast-util-select'; import {createHash} from 'node:crypto'; import {constants as fsConstants, existsSync} from 'node:fs'; diff --git a/src/docTree.ts b/src/docTree.ts index 4240071e4ec45e..81e57194c6044e 100644 --- a/src/docTree.ts +++ b/src/docTree.ts @@ -40,7 +40,7 @@ export function getDocsRootNode(): Promise { async function getDocsRootNodeUncached(): Promise { return frontmatterToTree( - isDeveloperDocs ? getDevDocsFrontMatter() : await getDocsFrontMatter() + await(isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter()) ); } diff --git a/src/mdx.ts b/src/mdx.ts index a5be0228388522..5d77922258e932 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -1,12 +1,12 @@ -import {createHash} from 'crypto'; +import {BinaryLike, createHash} from 'crypto'; import {cache} from 'react'; import matter from 'gray-matter'; import {s} from 'hastscript'; import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; -import fs from 'node:fs'; -import {readFile, writeFile} from 'node:fs/promises'; +import {mkdirSync} from 'node:fs'; +import {access, opendir, readFile, writeFile} from 'node:fs/promises'; import path from 'node:path'; import rehypeAutolinkHeadings from 'rehype-autolink-headings'; import rehypePresetMinify from 'rehype-preset-minify'; @@ -36,9 +36,9 @@ import {isVersioned, VERSION_INDICATOR} from './versioning'; const root = process.cwd(); const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); -fs.mkdirSync(CACHE_DIR, {recursive: true}); +mkdirSync(CACHE_DIR, {recursive: true}); -const md5 = data => createHash('md5').update(data).digest('hex'); +const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); function formatSlug(slug: string) { return slug.replace(/\.(mdx|md)/, ''); @@ -98,7 +98,7 @@ export const getVersionsFromDoc = (frontMatter: FrontMatter[], docPath: string) }; async function getDocsFrontMatterUncached(): Promise { - const frontMatter = getAllFilesFrontMatter(); + const frontMatter = await getAllFilesFrontMatter(); const categories = await apiCategories(); categories.forEach(category => { @@ -133,128 +133,166 @@ async function getDocsFrontMatterUncached(): Promise { return frontMatter; } -export function getDevDocsFrontMatter(): FrontMatter[] { +export async function getDevDocsFrontMatter(): Promise { const folder = 'develop-docs'; const docsPath = path.join(root, folder); const files = getAllFilesRecursively(docsPath); - const fmts = files - .map(file => { - const fileName = file.slice(docsPath.length + 1); - if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { - return undefined; - } + const fmts = ( + await Promise.all( + files.map(async file => { + const fileName = file.slice(docsPath.length + 1); + if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { + return undefined; + } - const source = fs.readFileSync(file, 'utf8'); - const {data: frontmatter} = matter(source); - return { - ...(frontmatter as FrontMatter), - slug: fileName.replace(/\/index.mdx?$/, '').replace(/\.mdx?$/, ''), - sourcePath: path.join(folder, fileName), - }; - }) - .filter(isNotNil); + const source = await readFile(file, 'utf8'); + const {data: frontmatter} = matter(source); + return { + ...(frontmatter as FrontMatter), + slug: fileName.replace(/\/index.mdx?$/, '').replace(/\.mdx?$/, ''), + sourcePath: path.join(folder, fileName), + }; + }) + ) + ).filter(isNotNil); return fmts; } -function getAllFilesFrontMatter() { +async function getAllFilesFrontMatter(): Promise { const docsPath = path.join(root, 'docs'); const files = getAllFilesRecursively(docsPath); const allFrontMatter: FrontMatter[] = []; - files.forEach(file => { - const fileName = file.slice(docsPath.length + 1); - if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { - return; - } + await Promise.all( + files.map(async file => { + const fileName = file.slice(docsPath.length + 1); + if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { + return; + } - if (fileName.indexOf('/common/') !== -1) { - return; - } + if (fileName.indexOf('/common/') !== -1) { + return; + } - const source = fs.readFileSync(file, 'utf8'); - const {data: frontmatter} = matter(source); - allFrontMatter.push({ - ...(frontmatter as FrontMatter), - slug: formatSlug(fileName), - sourcePath: path.join('docs', fileName), - }); - }); + const source = await readFile(file, 'utf8'); + const {data: frontmatter} = matter(source); + allFrontMatter.push({ + ...(frontmatter as FrontMatter), + slug: formatSlug(fileName), + sourcePath: path.join('docs', fileName), + }); + }) + ); // Add all `common` files in the right place. const platformsPath = path.join(docsPath, 'platforms'); - const platformNames = fs - .readdirSync(platformsPath) - .filter(p => !fs.statSync(path.join(platformsPath, p)).isFile()); - platformNames.forEach(platformName => { + for await (const platform of await opendir(platformsPath)) { + if (platform.isFile()) { + continue; + } + const platformName = platform.name; + let platformFrontmatter: PlatformConfig = {}; const configPath = path.join(platformsPath, platformName, 'config.yml'); - if (fs.existsSync(configPath)) { + try { platformFrontmatter = yaml.load( - fs.readFileSync(configPath, 'utf8') + await readFile(configPath, 'utf8') ) as PlatformConfig; + } catch (err) { + // the file may not exist and that's fine, for anything else we throw + if (err.code !== 'ENOENT') { + throw err; + } } const commonPath = path.join(platformsPath, platformName, 'common'); - if (!fs.existsSync(commonPath)) { - return; + try { + await access(commonPath); + } catch (err) { + continue; } const commonFileNames: string[] = getAllFilesRecursively(commonPath).filter( p => path.extname(p) === '.mdx' ); - const commonFiles = commonFileNames.map(commonFileName => { - const source = fs.readFileSync(commonFileName, 'utf8'); - const {data: frontmatter} = matter(source); - return {commonFileName, frontmatter: frontmatter as FrontMatter}; - }); - commonFiles.forEach(f => { - if (!isSupported(f.frontmatter, platformName)) { - return; - } + const commonFiles = await Promise.all( + commonFileNames.map(async commonFileName => { + const source = await readFile(commonFileName, 'utf8'); + const {data: frontmatter} = matter(source); + return {commonFileName, frontmatter: frontmatter as FrontMatter}; + }) + ); - const subpath = f.commonFileName.slice(commonPath.length + 1); - const slug = f.commonFileName.slice(docsPath.length + 1).replace(/\/common\//, '/'); - if ( - !fs.existsSync(path.join(docsPath, slug)) && - !fs.existsSync(path.join(docsPath, slug.replace('/index.mdx', '.mdx'))) - ) { - let frontmatter = f.frontmatter; - if (subpath === 'index.mdx') { - frontmatter = {...frontmatter, ...platformFrontmatter}; + await Promise.all( + commonFiles.map(async f => { + if (!isSupported(f.frontmatter, platformName)) { + return; } - allFrontMatter.push({ - ...frontmatter, - slug: formatSlug(slug), - sourcePath: 'docs/' + f.commonFileName.slice(docsPath.length + 1), - }); - } - }); + + const subpath = f.commonFileName.slice(commonPath.length + 1); + const slug = f.commonFileName + .slice(docsPath.length + 1) + .replace(/\/common\//, '/'); + const noFrontMatter = ( + await Promise.allSettled([ + access(path.join(docsPath, slug)), + access(path.join(docsPath, slug.replace('/index.mdx', '.mdx'))), + ]) + ).every(r => r.status === 'rejected'); + if (noFrontMatter) { + let frontmatter = f.frontmatter; + if (subpath === 'index.mdx') { + frontmatter = {...frontmatter, ...platformFrontmatter}; + } + allFrontMatter.push({ + ...frontmatter, + slug: formatSlug(slug), + sourcePath: 'docs/' + f.commonFileName.slice(docsPath.length + 1), + }); + } + }) + ); const guidesPath = path.join(docsPath, 'platforms', platformName, 'guides'); - let guideNames: string[] = []; - if (!fs.existsSync(guidesPath)) { - return; + try { + await access(guidesPath); + } catch (err) { + continue; } - guideNames = fs - .readdirSync(guidesPath) - .filter(g => !fs.statSync(path.join(guidesPath, g)).isFile()); - guideNames.forEach(guideName => { + + for await (const guide of await opendir(guidesPath)) { + if (guide.isFile()) { + continue; + } + const guideName = guide.name; + let guideFrontmatter: FrontMatter | null = null; const guideConfigPath = path.join(guidesPath, guideName, 'config.yml'); - if (fs.existsSync(guideConfigPath)) { + try { guideFrontmatter = yaml.load( - fs.readFileSync(guideConfigPath, 'utf8') + await readFile(guideConfigPath, 'utf8') ) as FrontMatter; + } catch (err) { + if (err.code !== 'ENOENT') { + throw err; + } } - commonFiles.forEach(f => { - if (!isSupported(f.frontmatter, platformName, guideName)) { - return; - } + await Promise.all( + commonFiles.map(async f => { + if (!isSupported(f.frontmatter, platformName, guideName)) { + return; + } + + const subpath = f.commonFileName.slice(commonPath.length + 1); + const slug = path.join('platforms', platformName, 'guides', guideName, subpath); + try { + await access(path.join(docsPath, slug)); + return; + } catch { + // pass + } - const subpath = f.commonFileName.slice(commonPath.length + 1); - const slug = path.join('platforms', platformName, 'guides', guideName, subpath); - if (!fs.existsSync(path.join(docsPath, slug))) { let frontmatter = f.frontmatter; if (subpath === 'index.mdx') { frontmatter = {...frontmatter, ...guideFrontmatter}; @@ -264,11 +302,10 @@ function getAllFilesFrontMatter() { slug: formatSlug(slug), sourcePath: 'docs/' + f.commonFileName.slice(docsPath.length + 1), }); - } - }); - }); - }); - + }) + ); + } + } return allFrontMatter; } @@ -323,10 +360,14 @@ export async function getFileBySlug(slug: string) { let mdIndexPath = path.join(root, slug, 'index.md'); if ( - slug.indexOf('docs/platforms/') === 0 && - [mdxPath, mdxIndexPath, mdPath, mdIndexPath, versionedMdxIndexPath].filter(p => - fs.existsSync(p) - ).length === 0 + slug.startsWith('docs/platforms/') && + ( + await Promise.allSettled( + [mdxPath, mdxIndexPath, mdPath, mdIndexPath, versionedMdxIndexPath].map(p => + access(p) + ) + ) + ).every(r => r.status === 'rejected') ) { // Try the common folder. const slugParts = slug.split('/'); @@ -342,25 +383,49 @@ export async function getFileBySlug(slug: string) { commonFilePath = path.join(commonPath, slugParts.slice(3).join('/')); versionedMdxIndexPath = getVersionedIndexPath(root, commonFilePath, '.mdx'); } - if (commonFilePath && fs.existsSync(commonPath)) { - mdxPath = path.join(root, `${commonFilePath}.mdx`); - mdxIndexPath = path.join(root, commonFilePath, 'index.mdx'); - mdPath = path.join(root, `${commonFilePath}.md`); - mdIndexPath = path.join(root, commonFilePath, 'index.md'); - versionedMdxIndexPath = getVersionedIndexPath(root, commonFilePath, '.mdx'); + if (commonFilePath) { + try { + await access(commonPath); + mdxPath = path.join(root, `${commonFilePath}.mdx`); + mdxIndexPath = path.join(root, commonFilePath, 'index.mdx'); + mdPath = path.join(root, `${commonFilePath}.md`); + mdIndexPath = path.join(root, commonFilePath, 'index.md'); + versionedMdxIndexPath = getVersionedIndexPath(root, commonFilePath, '.mdx'); + } catch { + // If the common folder does not exist, we can ignore it. + } } } // check if a versioned index file exists - if (isVersioned(slug) && fs.existsSync(mdxIndexPath)) { - mdxIndexPath = addVersionToFilePath(mdxIndexPath, slug.split(VERSION_INDICATOR)[1]); + if (isVersioned(slug)) { + try { + await access(mdxIndexPath); + mdxIndexPath = addVersionToFilePath(mdxIndexPath, slug.split(VERSION_INDICATOR)[1]); + } catch { + // pass, the file does not exist + } } - const sourcePath = - [mdxPath, mdxIndexPath, mdPath, versionedMdxIndexPath].find(fs.existsSync) ?? - mdIndexPath; + let source: string | undefined = undefined; + let sourcePath: string | undefined = undefined; + const sourcePaths = [mdxPath, mdxIndexPath, mdPath, versionedMdxIndexPath, mdIndexPath]; + const errors: Error[] = []; + for (const p of sourcePaths) { + try { + source = await readFile(p, 'utf8'); + sourcePath = p; + break; + } catch (e) { + errors.push(e); + } + } + if (source === undefined || sourcePath === undefined) { + throw new Error( + `Failed to find a valid source file for slug "${slug}". Tried:\n${sourcePaths.join('\n')}\nErrors:\n${errors.map(e => e.message).join('\n')}` + ); + } - const source = await readFile(sourcePath, 'utf8'); const cacheKey = md5(source); const cacheFile = path.join(CACHE_DIR, cacheKey); From ce8368aa304c55c2d7a6729469e1a1d43b563ef5 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Mon, 23 Jun 2025 19:53:17 +0100 Subject: [PATCH 03/23] fix broken imports --- docs/product/explore/session-replay/web/index.mdx | 2 -- docs/product/sentry-basics/performance-monitoring.mdx | 2 -- 2 files changed, 4 deletions(-) diff --git a/docs/product/explore/session-replay/web/index.mdx b/docs/product/explore/session-replay/web/index.mdx index 4d8d3495964b7c..0b3230193360f7 100644 --- a/docs/product/explore/session-replay/web/index.mdx +++ b/docs/product/explore/session-replay/web/index.mdx @@ -4,8 +4,6 @@ sidebar_order: 10 description: "Learn about Session Replay and its video-like reproductions of user interactions, which can help you see when users are frustrated and build a better web experience." --- - - Session Replay allows you to see video-like reproductions of user sessions which can help you understand what happened before, during, and after an error or performance issue occurred. You'll be able to gain deeper debugging context into issues so that you can reproduce and resolve problems faster without the guesswork. As you play back each session, you'll be able to see every user interaction in relation to network requests, DOM events, and console messages. It’s effectively like having [DevTools](https://developer.chrome.com/docs/devtools/overview/) active in your production user sessions. Replays are integrated with other parts of the Sentry product so you can see how the user experience is impacted by errors and slow transactions. You'll see session replays associated with error events on the [Issue Details](/product/issues/issue-details/) page, and those associated with slow transactions on the [Transaction Summary](/product/insights/overview/transaction-summary/) page. For [backend error replays](/product/explore/session-replay/web/getting-started/#replays-for-backend-errors), any contributing backend errors will be included in the replay's timeline, [breadcrumbs](https://docs.sentry.io/product/issues/issue-details/breadcrumbs/), and errors. diff --git a/docs/product/sentry-basics/performance-monitoring.mdx b/docs/product/sentry-basics/performance-monitoring.mdx index 1131df0e8cf4c3..5da714f8f777bf 100644 --- a/docs/product/sentry-basics/performance-monitoring.mdx +++ b/docs/product/sentry-basics/performance-monitoring.mdx @@ -4,8 +4,6 @@ sidebar_order: 1 description: "Understand and monitor how your application performs in production. Track key metrics, analyze bottlenecks, and resolve performance issues with distributed tracing, detailed transaction data, and automated issue detection." --- - - In many tools, Performance Monitoring is just about tracking a few key metrics on your web pages. Sentry takes a different approach. By setting up [Tracing](/concepts/key-terms/tracing/), Sentry captures detailed performance data for every transaction in your entire application stack and automatically presents it in a variety of easy-to-use but powerful features so you can rapidly identify and resolve performance issues as they happen - all in one place. From 8bf9f4b9d0dc9f3852c9086655c73f8bd8025bfb Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Mon, 23 Jun 2025 18:54:04 +0000 Subject: [PATCH 04/23] [getsentry/action-github-commit] Auto commit --- scripts/algolia.ts | 6 +++--- src/docTree.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/algolia.ts b/scripts/algolia.ts index 423b07578eae4f..71aa8e041e962e 100644 --- a/scripts/algolia.ts +++ b/scripts/algolia.ts @@ -64,9 +64,9 @@ indexAndUpload(); async function indexAndUpload() { // the page front matters are the source of truth for the static doc routes // as they are used directly by generateStaticParams() on [[..path]] page - const pageFrontMatters = await( - isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter() - ); + const pageFrontMatters = await (isDeveloperDocs + ? getDevDocsFrontMatter() + : getDocsFrontMatter()); const records = await generateAlogliaRecords(pageFrontMatters); console.log('πŸ”₯ Generated %d new Algolia records.', records.length); const existingRecordIds = await fetchExistingRecordIds(index); diff --git a/src/docTree.ts b/src/docTree.ts index 81e57194c6044e..1857c553eb8e6b 100644 --- a/src/docTree.ts +++ b/src/docTree.ts @@ -40,7 +40,7 @@ export function getDocsRootNode(): Promise { async function getDocsRootNodeUncached(): Promise { return frontmatterToTree( - await(isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter()) + await (isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter()) ); } From 871e0fc4aa8877a91a315e250a9357b5461a2249 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Mon, 23 Jun 2025 19:57:32 +0100 Subject: [PATCH 05/23] don't swallow all errors from access() --- src/mdx.ts | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 5d77922258e932..e5e268c1258354 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -349,8 +349,11 @@ export async function getFileBySlug(slug: string) { let configFrontmatter: PlatformConfig | undefined; try { configFrontmatter = yaml.load(await readFile(configPath, 'utf8')) as PlatformConfig; - } catch (e) { + } catch (err) { // If the config file does not exist, we can ignore it. + if (err.code !== 'ENOENT') { + throw err; + } } let mdxPath = path.join(root, `${slug}.mdx`); @@ -391,8 +394,11 @@ export async function getFileBySlug(slug: string) { mdPath = path.join(root, `${commonFilePath}.md`); mdIndexPath = path.join(root, commonFilePath, 'index.md'); versionedMdxIndexPath = getVersionedIndexPath(root, commonFilePath, '.mdx'); - } catch { + } catch (err) { // If the common folder does not exist, we can ignore it. + if (err.code !== 'ENOENT') { + throw err; + } } } } @@ -402,8 +408,11 @@ export async function getFileBySlug(slug: string) { try { await access(mdxIndexPath); mdxIndexPath = addVersionToFilePath(mdxIndexPath, slug.split(VERSION_INDICATOR)[1]); - } catch { + } catch (err) { // pass, the file does not exist + if (err.code !== 'ENOENT') { + throw err; + } } } @@ -432,11 +441,11 @@ export async function getFileBySlug(slug: string) { try { const cached = JSON.parse(await readFile(cacheFile, 'utf8')); return cached; - } catch (e) { - if (e.code !== 'ENOENT') { + } catch (err) { + if (err.code !== 'ENOENT') { // If cache is corrupted, ignore and proceed // eslint-disable-next-line no-console - console.warn(`Failed to read MDX cache: ${cacheFile}`, e); + console.warn(`Failed to read MDX cache: ${cacheFile}`, err); } } From 61c73a302afe706c5c002737af3e5b9655462ddf Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 00:10:08 +0100 Subject: [PATCH 06/23] brotli to the rescue! --- package.json | 2 +- scripts/generate-md-exports.mjs | 130 ++++++++++++++++++++++---------- src/mdx.ts | 64 +++++++++++++++- 3 files changed, 151 insertions(+), 45 deletions(-) diff --git a/package.json b/package.json index 203edf3cd2e4a4..8cc2726cf7e16c 100644 --- a/package.json +++ b/package.json @@ -143,7 +143,7 @@ "@types/dompurify": "3.0.5" }, "volta": { - "node": "20.11.0", + "node": "22.16.0", "yarn": "1.22.22" } } diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 9cf98b8304df28..234438e7665f16 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -2,12 +2,18 @@ /* eslint-disable no-console */ import {selectAll} from 'hast-util-select'; import {createHash} from 'node:crypto'; -import {constants as fsConstants, existsSync} from 'node:fs'; -import {copyFile, mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises'; +import {createReadStream, createWriteStream, existsSync} from 'node:fs'; +import {mkdir, opendir, readFile, rm} from 'node:fs/promises'; import {cpus} from 'node:os'; import * as path from 'node:path'; +import {Readable} from 'node:stream'; import {fileURLToPath} from 'node:url'; import {isMainThread, parentPort, Worker, workerData} from 'node:worker_threads'; +import { + constants as zlibConstants, + createBrotliCompress, + createBrotliDecompress, +} from 'node:zlib'; import rehypeParse from 'rehype-parse'; import rehypeRemark from 'rehype-remark'; import remarkGfm from 'remark-gfm'; @@ -15,14 +21,16 @@ import remarkStringify from 'remark-stringify'; import {unified} from 'unified'; import {remove} from 'unist-util-remove'; +const CACHE_COMPRESS_LEVEL = 4; + function taskFinishHandler(data) { if (data.failedTasks.length === 0) { console.log(`βœ… Worker[${data.id}]: ${data.success} files successfully.`); - } else { - hasErrors = true; - console.error(`❌ Worker[${data.id}]: ${data.failedTasks.length} files failed:`); - console.error(data.failedTasks); + return false; } + console.error(`❌ Worker[${data.id}]: ${data.failedTasks.length} files failed:`); + console.error(data.failedTasks); + return true; } async function createWork() { @@ -37,7 +45,7 @@ async function createWork() { const INPUT_DIR = path.join(root, '.next', 'server', 'app'); const OUTPUT_DIR = path.join(root, 'public', 'md-exports'); - const CACHE_VERSION = 1; + const CACHE_VERSION = 2; const CACHE_DIR = path.join(root, '.next', 'cache', 'md-exports', `v${CACHE_VERSION}`); const noCache = !existsSync(CACHE_DIR); if (noCache) { @@ -86,7 +94,7 @@ async function createWork() { workerData: {id, noCache, cacheDir: CACHE_DIR, tasks: workerTasks[id]}, }); let hasErrors = false; - worker.on('message', taskFinishHandler); + worker.on('message', data => (hasErrors = taskFinishHandler(data))); worker.on('error', reject); worker.on('exit', code => { if (code !== 0) { @@ -104,7 +112,11 @@ async function createWork() { cacheDir: CACHE_DIR, tasks: workerTasks[workerTasks.length - 1], id: workerTasks.length - 1, - }).then(taskFinishHandler) + }).then(data => { + if (taskFinishHandler(data)) { + throw new Error(`Worker[${data.id}] had some errors.`); + } + }) ); await Promise.all(workerPromises); @@ -121,45 +133,83 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { const cacheFile = path.join(cacheDir, hash); if (!noCache) { try { - await copyFile(cacheFile, target, fsConstants.COPYFILE_FICLONE); + const {resolve, reject, promise} = Promise.withResolvers(); + const reader = createReadStream(cacheFile); + reader.on('error', reject); + reader.pause(); + + const writer = createWriteStream(target, { + encoding: 'utf8', + }); + writer.on('error', reject); + + const decompressor = createBrotliDecompress(); + const stream = reader.pipe(decompressor).pipe(writer); + stream.on('error', reject); + stream.on('finish', resolve); + + reader.resume(); + + await promise; return; } catch { // pass } } - await writeFile( - target, - String( - await unified() - .use(rehypeParse) - // Need the `main div > hgroup` selector for the headers - .use(() => tree => selectAll('main div > hgroup, div#main', tree)) - // If we don't do this wrapping, rehypeRemark just returns an empty string -- yeah WTF? - .use(() => tree => ({ - type: 'element', - tagName: 'div', - properties: {}, - children: tree, - })) - .use(rehypeRemark, { - document: false, - handlers: { - // Remove buttons as they usually get confusing in markdown, especially since we use them as tab headers - button() {}, - }, - }) - // We end up with empty inline code blocks, probably from some tab logic in the HTML, remove them - .use(() => tree => remove(tree, {type: 'inlineCode', value: ''})) - .use(remarkGfm) - .use(remarkStringify) - .process(text) - ) + const data = String( + await unified() + .use(rehypeParse) + // Need the `main div > hgroup` selector for the headers + .use(() => tree => selectAll('main div > hgroup, div#main', tree)) + // If we don't do this wrapping, rehypeRemark just returns an empty string -- yeah WTF? + .use(() => tree => ({ + type: 'element', + tagName: 'div', + properties: {}, + children: tree, + })) + .use(rehypeRemark, { + document: false, + handlers: { + // Remove buttons as they usually get confusing in markdown, especially since we use them as tab headers + button() {}, + }, + }) + // We end up with empty inline code blocks, probably from some tab logic in the HTML, remove them + .use(() => tree => remove(tree, {type: 'inlineCode', value: ''})) + .use(remarkGfm) + .use(remarkStringify) + .process(text) ); - copyFile(target, cacheFile, fsConstants.COPYFILE_FICLONE).catch(error => { - // eslint-disable-next-line no-console - console.error(`Failed to cache file ${cacheFile}:`, error); + const reader = Readable.from(data); + reader.pause(); + + const {resolve, reject, promise} = Promise.withResolvers(); + const writer = createWriteStream(target, { + encoding: 'utf8', + }); + writer.on('error', reject); + + const compressor = createBrotliCompress({ + chunkSize: 32 * 1024, + params: { + [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, + [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, + [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, + }, }); + const cacheWriter = createWriteStream(cacheFile); + + const writeStream = reader.pipe(writer); + writeStream.on('error', reject); + writeStream.on('finish', resolve); + + const cacheWriteStream = reader.pipe(compressor).pipe(cacheWriter); + cacheWriteStream.on('error', err => console.warn('Error writing cache file:', err)); + reader.resume(); + + await promise; } async function processTaskList({id, tasks, cacheDir, noCache}) { diff --git a/src/mdx.ts b/src/mdx.ts index e5e268c1258354..eb9bd281e2e12b 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -5,9 +5,15 @@ import matter from 'gray-matter'; import {s} from 'hastscript'; import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; -import {mkdirSync} from 'node:fs'; -import {access, opendir, readFile, writeFile} from 'node:fs/promises'; +import {createReadStream, createWriteStream, mkdirSync} from 'node:fs'; +import {access, opendir, readFile} from 'node:fs/promises'; import path from 'node:path'; +import {Readable} from 'node:stream'; +import { + constants as zlibConstants, + createBrotliCompress, + createBrotliDecompress, +} from 'node:zlib'; import rehypeAutolinkHeadings from 'rehype-autolink-headings'; import rehypePresetMinify from 'rehype-preset-minify'; import rehypePrismDiff from 'rehype-prism-diff'; @@ -35,11 +41,61 @@ import {isNotNil} from './utils'; import {isVersioned, VERSION_INDICATOR} from './versioning'; const root = process.cwd(); +const CACHE_COMPRESS_LEVEL = 4; const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); mkdirSync(CACHE_DIR, {recursive: true}); const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); +async function readCacheFile(file: string): Promise { + const {resolve, reject, promise} = Promise.withResolvers(); + + const reader = createReadStream(file); + reader.on('error', reject); + reader.pause(); + + const decompressor = createBrotliDecompress(); + decompressor.on('error', reject); + + const buffers: Buffer[] = []; + const stream = reader.pipe(decompressor); + stream.on('data', chunk => { + buffers.push(chunk); + }); + stream.on('finish', () => { + resolve(Buffer.concat(buffers).toString('utf8')); + }); + + reader.resume(); + return await promise; +} + +async function writeCacheFile(file: string, data: string) { + const {resolve, reject, promise} = Promise.withResolvers(); + + const reader = Readable.from(data); + reader.pause(); + + const writer = createWriteStream(file); + writer.on('error', reject); + + const compressor = createBrotliCompress({ + chunkSize: 32 * 1024, + params: { + [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, + [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, + [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, + }, + }); + const stream = reader.pipe(compressor).pipe(writer); + stream.on('error', reject); + stream.on('finish', resolve); + + reader.resume(); + + await promise; +} + function formatSlug(slug: string) { return slug.replace(/\.(mdx|md)/, ''); } @@ -439,7 +495,7 @@ export async function getFileBySlug(slug: string) { const cacheFile = path.join(CACHE_DIR, cacheKey); try { - const cached = JSON.parse(await readFile(cacheFile, 'utf8')); + const cached = JSON.parse(await readCacheFile(cacheFile)); return cached; } catch (err) { if (err.code !== 'ENOENT') { @@ -574,7 +630,7 @@ export async function getFileBySlug(slug: string) { }, }; - writeFile(cacheFile, JSON.stringify(resultObj), 'utf8').catch(e => { + writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { // eslint-disable-next-line no-console console.warn(`Failed to write MDX cache: ${cacheFile}`, e); }); From 3862de8c82380a46057aaf242514ff623e3f0a03 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 00:26:02 +0100 Subject: [PATCH 07/23] withResolvers shim --- package.json | 2 +- scripts/generate-md-exports.mjs | 12 ++++++++++-- src/mdx.ts | 16 ++++++++++++++-- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index 8cc2726cf7e16c..0e6837cf7a7ef9 100644 --- a/package.json +++ b/package.json @@ -143,7 +143,7 @@ "@types/dompurify": "3.0.5" }, "volta": { - "node": "22.16.0", + "node": "20.19.3", "yarn": "1.22.22" } } diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 234438e7665f16..3819ff5c801463 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -126,6 +126,14 @@ async function createWork() { } const md5 = data => createHash('md5').update(data).digest('hex'); +const withResolvers = () => { + let resolve, reject; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return {resolve, reject, promise}; +}; async function genMDFromHTML(source, target, {cacheDir, noCache}) { const text = await readFile(source, {encoding: 'utf8'}); @@ -133,7 +141,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { const cacheFile = path.join(cacheDir, hash); if (!noCache) { try { - const {resolve, reject, promise} = Promise.withResolvers(); + const {resolve, reject, promise} = withResolvers(); const reader = createReadStream(cacheFile); reader.on('error', reject); reader.pause(); @@ -185,7 +193,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { const reader = Readable.from(data); reader.pause(); - const {resolve, reject, promise} = Promise.withResolvers(); + const {resolve, reject, promise} = withResolvers(); const writer = createWriteStream(target, { encoding: 'utf8', }); diff --git a/src/mdx.ts b/src/mdx.ts index eb9bd281e2e12b..8717e18cdc35f5 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -47,8 +47,20 @@ mkdirSync(CACHE_DIR, {recursive: true}); const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); +// TODO: Remove this shim when we upgrade to Node 22 on Vercel +function withResolvers() { + let resolve!: (value: T | PromiseLike) => void; + let reject!: (reason?: any) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return {resolve, reject, promise}; +}; + + async function readCacheFile(file: string): Promise { - const {resolve, reject, promise} = Promise.withResolvers(); + const {resolve, reject, promise} = withResolvers(); const reader = createReadStream(file); reader.on('error', reject); @@ -71,7 +83,7 @@ async function readCacheFile(file: string): Promise { } async function writeCacheFile(file: string, data: string) { - const {resolve, reject, promise} = Promise.withResolvers(); + const {resolve, reject, promise} = withResolvers(); const reader = Readable.from(data); reader.pause(); From f529f6a971683ee6953b39a5703136fff7442404 Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Mon, 23 Jun 2025 23:26:55 +0000 Subject: [PATCH 08/23] [getsentry/action-github-commit] Auto commit --- src/mdx.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 8717e18cdc35f5..a15a580f0a512a 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -56,8 +56,7 @@ function withResolvers() { reject = rej; }); return {resolve, reject, promise}; -}; - +} async function readCacheFile(file: string): Promise { const {resolve, reject, promise} = withResolvers(); From 39f98698231606039e4583d1518cdf0df8083803 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 00:46:49 +0100 Subject: [PATCH 09/23] revert shim as we are on n22 now --- package.json | 2 +- scripts/generate-md-exports.mjs | 12 ++---------- src/mdx.ts | 15 ++------------- 3 files changed, 5 insertions(+), 24 deletions(-) diff --git a/package.json b/package.json index 0e6837cf7a7ef9..8cc2726cf7e16c 100644 --- a/package.json +++ b/package.json @@ -143,7 +143,7 @@ "@types/dompurify": "3.0.5" }, "volta": { - "node": "20.19.3", + "node": "22.16.0", "yarn": "1.22.22" } } diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 3819ff5c801463..234438e7665f16 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -126,14 +126,6 @@ async function createWork() { } const md5 = data => createHash('md5').update(data).digest('hex'); -const withResolvers = () => { - let resolve, reject; - const promise = new Promise((res, rej) => { - resolve = res; - reject = rej; - }); - return {resolve, reject, promise}; -}; async function genMDFromHTML(source, target, {cacheDir, noCache}) { const text = await readFile(source, {encoding: 'utf8'}); @@ -141,7 +133,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { const cacheFile = path.join(cacheDir, hash); if (!noCache) { try { - const {resolve, reject, promise} = withResolvers(); + const {resolve, reject, promise} = Promise.withResolvers(); const reader = createReadStream(cacheFile); reader.on('error', reject); reader.pause(); @@ -193,7 +185,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { const reader = Readable.from(data); reader.pause(); - const {resolve, reject, promise} = withResolvers(); + const {resolve, reject, promise} = Promise.withResolvers(); const writer = createWriteStream(target, { encoding: 'utf8', }); diff --git a/src/mdx.ts b/src/mdx.ts index a15a580f0a512a..eb9bd281e2e12b 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -47,19 +47,8 @@ mkdirSync(CACHE_DIR, {recursive: true}); const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); -// TODO: Remove this shim when we upgrade to Node 22 on Vercel -function withResolvers() { - let resolve!: (value: T | PromiseLike) => void; - let reject!: (reason?: any) => void; - const promise = new Promise((res, rej) => { - resolve = res; - reject = rej; - }); - return {resolve, reject, promise}; -} - async function readCacheFile(file: string): Promise { - const {resolve, reject, promise} = withResolvers(); + const {resolve, reject, promise} = Promise.withResolvers(); const reader = createReadStream(file); reader.on('error', reject); @@ -82,7 +71,7 @@ async function readCacheFile(file: string): Promise { } async function writeCacheFile(file: string, data: string) { - const {resolve, reject, promise} = withResolvers(); + const {resolve, reject, promise} = Promise.withResolvers(); const reader = Readable.from(data); reader.pause(); From 0673c540c12d0558b13b13429195d4435bfa2e03 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 01:20:34 +0100 Subject: [PATCH 10/23] try without a cache version --- scripts/generate-md-exports.mjs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 234438e7665f16..be0fd206f41e23 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -45,8 +45,7 @@ async function createWork() { const INPUT_DIR = path.join(root, '.next', 'server', 'app'); const OUTPUT_DIR = path.join(root, 'public', 'md-exports'); - const CACHE_VERSION = 2; - const CACHE_DIR = path.join(root, '.next', 'cache', 'md-exports', `v${CACHE_VERSION}`); + const CACHE_DIR = path.join(root, '.next', 'cache', 'md-exports'); const noCache = !existsSync(CACHE_DIR); if (noCache) { await mkdir(CACHE_DIR, {recursive: true}); From 54fd866aa9ac1c018f570b4978f2cebb86958986 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 12:08:54 +0100 Subject: [PATCH 11/23] add some cache diagnostics --- scripts/generate-md-exports.mjs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index be0fd206f41e23..bf3e145bbe301a 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -25,6 +25,9 @@ const CACHE_COMPRESS_LEVEL = 4; function taskFinishHandler(data) { if (data.failedTasks.length === 0) { + console.log( + `πŸ’° Worker[${data.id}]: Cache hits: ${data.cacheHits} (${Math.round((data.cacheHits / data.success) * 100)}%)` + ); console.log(`βœ… Worker[${data.id}]: ${data.success} files successfully.`); return false; } @@ -45,12 +48,6 @@ async function createWork() { const INPUT_DIR = path.join(root, '.next', 'server', 'app'); const OUTPUT_DIR = path.join(root, 'public', 'md-exports'); - const CACHE_DIR = path.join(root, '.next', 'cache', 'md-exports'); - const noCache = !existsSync(CACHE_DIR); - if (noCache) { - await mkdir(CACHE_DIR, {recursive: true}); - } - console.log(`πŸš€ Starting markdown generation from: ${INPUT_DIR}`); console.log(`πŸ“ Output directory: ${OUTPUT_DIR}`); @@ -58,6 +55,14 @@ async function createWork() { await rm(OUTPUT_DIR, {recursive: true, force: true}); await mkdir(OUTPUT_DIR, {recursive: true}); + const CACHE_DIR = path.join(root, '.next', 'cache', 'md-exports'); + console.log(`πŸ’° Cache directory: ${CACHE_DIR}`); + const noCache = !existsSync(CACHE_DIR); + if (noCache) { + console.log(`ℹ️ No cache directory found, this will take a while...`); + await mkdir(CACHE_DIR, {recursive: true}); + } + // On a 16-core machine, 8 workers were optimal (and slightly faster than 16) const numWorkers = Math.max(Math.floor(cpus().length / 2), 2); const workerTasks = new Array(numWorkers).fill(null).map(() => []); @@ -150,7 +155,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { reader.resume(); await promise; - return; + return true; } catch { // pass } @@ -209,13 +214,15 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { reader.resume(); await promise; + return false; } async function processTaskList({id, tasks, cacheDir, noCache}) { const failedTasks = []; + let cacheHits = 0; for (const {sourcePath, targetPath} of tasks) { try { - await genMDFromHTML(sourcePath, targetPath, { + cacheHits += await genMDFromHTML(sourcePath, targetPath, { cacheDir, noCache, }); @@ -223,7 +230,7 @@ async function processTaskList({id, tasks, cacheDir, noCache}) { failedTasks.push({sourcePath, targetPath, error}); } } - return {id, success: tasks.length - failedTasks.length, failedTasks}; + return {id, success: tasks.length - failedTasks.length, failedTasks, cacheHits}; } async function doWork(work) { From eaf2bcf7021bb8203d270a618dc3283976d1538b Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 14:14:16 +0100 Subject: [PATCH 12/23] finally, cache hits! --- scripts/generate-md-exports.mjs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index bf3e145bbe301a..3684745a373a74 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -28,7 +28,7 @@ function taskFinishHandler(data) { console.log( `πŸ’° Worker[${data.id}]: Cache hits: ${data.cacheHits} (${Math.round((data.cacheHits / data.success) * 100)}%)` ); - console.log(`βœ… Worker[${data.id}]: ${data.success} files successfully.`); + console.log(`βœ… Worker[${data.id}]: converted ${data.success} files successfully.`); return false; } console.error(`❌ Worker[${data.id}]: ${data.failedTasks.length} files failed:`); @@ -132,7 +132,10 @@ async function createWork() { const md5 = data => createHash('md5').update(data).digest('hex'); async function genMDFromHTML(source, target, {cacheDir, noCache}) { - const text = await readFile(source, {encoding: 'utf8'}); + const text = (await readFile(source, {encoding: 'utf8'})) + // Remove all script tags, as they are not needed in markdown + // and they are not stable across builds, causing cache misses + .replace(/]*>[\s\S]*?<\/script>/g, ''); const hash = md5(text); const cacheFile = path.join(cacheDir, hash); if (!noCache) { From 7211307c4b413eb4cd5b8cf1683a7b106254f265 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 14:17:45 +0100 Subject: [PATCH 13/23] case-insensitive script tag removal --- scripts/generate-md-exports.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 3684745a373a74..8cb32598af2f44 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -135,7 +135,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { const text = (await readFile(source, {encoding: 'utf8'})) // Remove all script tags, as they are not needed in markdown // and they are not stable across builds, causing cache misses - .replace(/]*>[\s\S]*?<\/script>/g, ''); + .replace(/]*>[\s\S]*?<\/script>/gi, ''); const hash = md5(text); const cacheFile = path.join(cacheDir, hash); if (!noCache) { From 08df8fc3b7db1a137a2954558098397d54ea4d2e Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 18:00:18 +0100 Subject: [PATCH 14/23] do streams correctlyO --- .babelrc.js.bak | 16 ------- next.config.ts | 4 ++ package.json | 5 ++- scripts/generate-md-exports.mjs | 70 ++++++++++++------------------ src/mdx.ts | 77 +++++++++++++-------------------- yarn.lock | 18 ++++---- 6 files changed, 75 insertions(+), 115 deletions(-) delete mode 100644 .babelrc.js.bak diff --git a/.babelrc.js.bak b/.babelrc.js.bak deleted file mode 100644 index 68d19cf2a5d8a5..00000000000000 --- a/.babelrc.js.bak +++ /dev/null @@ -1,16 +0,0 @@ -/* eslint-env node */ -/* eslint import/no-nodejs-modules:0 */ - -let ignore = [`**/dist`]; - -// Jest needs to compile this code, but generally we don't want this copied -// to output folders -if (process.env.NODE_ENV !== `test`) { - ignore.push(`**/__tests__`); -} - -module.exports = { - sourceMaps: true, - presets: [], - ignore, -}; diff --git a/next.config.ts b/next.config.ts index 56fb66502edf1b..dce5e0466b9de0 100644 --- a/next.config.ts +++ b/next.config.ts @@ -38,6 +38,10 @@ const nextConfig = { trailingSlash: true, serverExternalPackages: ['rehype-preset-minify'], outputFileTracingExcludes, + experimental: { + webpackBuildWorker: true, + parallelServerCompiles: true, + }, webpack: (config, options) => { config.plugins.push( codecovNextJSWebpackPlugin({ diff --git a/package.json b/package.json index 8cc2726cf7e16c..e3e1f299eed03c 100644 --- a/package.json +++ b/package.json @@ -116,7 +116,7 @@ "@tailwindcss/forms": "^0.5.7", "@tailwindcss/typography": "^0.5.10", "@types/dompurify": "3.0.5", - "@types/node": "^20", + "@types/node": "^22", "@types/react": "18.3.12", "@types/react-dom": "18.3.1", "@types/ws": "^8.5.10", @@ -140,7 +140,8 @@ }, "resolutions": { "dompurify": "3.2.4", - "@types/dompurify": "3.0.5" + "@types/dompurify": "3.0.5", + "@types/node": "^22" }, "volta": { "node": "22.16.0", diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 8cb32598af2f44..6b99a70e720731 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -7,6 +7,7 @@ import {mkdir, opendir, readFile, rm} from 'node:fs/promises'; import {cpus} from 'node:os'; import * as path from 'node:path'; import {Readable} from 'node:stream'; +import {pipeline} from 'node:stream/promises'; import {fileURLToPath} from 'node:url'; import {isMainThread, parentPort, Worker, workerData} from 'node:worker_threads'; import { @@ -140,24 +141,14 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { const cacheFile = path.join(cacheDir, hash); if (!noCache) { try { - const {resolve, reject, promise} = Promise.withResolvers(); - const reader = createReadStream(cacheFile); - reader.on('error', reject); - reader.pause(); - - const writer = createWriteStream(target, { - encoding: 'utf8', - }); - writer.on('error', reject); - - const decompressor = createBrotliDecompress(); - const stream = reader.pipe(decompressor).pipe(writer); - stream.on('error', reject); - stream.on('finish', resolve); - - reader.resume(); + await pipeline( + createReadStream(cacheFile), + createBrotliDecompress(), + createWriteStream(target, { + encoding: 'utf8', + }) + ); - await promise; return true; } catch { // pass @@ -190,33 +181,28 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { .process(text) ); const reader = Readable.from(data); - reader.pause(); - const {resolve, reject, promise} = Promise.withResolvers(); - const writer = createWriteStream(target, { - encoding: 'utf8', - }); - writer.on('error', reject); - - const compressor = createBrotliCompress({ - chunkSize: 32 * 1024, - params: { - [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, - [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, - [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, - }, - }); - const cacheWriter = createWriteStream(cacheFile); - - const writeStream = reader.pipe(writer); - writeStream.on('error', reject); - writeStream.on('finish', resolve); - - const cacheWriteStream = reader.pipe(compressor).pipe(cacheWriter); - cacheWriteStream.on('error', err => console.warn('Error writing cache file:', err)); - reader.resume(); + await Promise.all([ + pipeline( + reader, + createWriteStream(target, { + encoding: 'utf8', + }) + ), + pipeline( + reader, + createBrotliCompress({ + chunkSize: 32 * 1024, + params: { + [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, + [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, + [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, + }, + }), + createWriteStream(cacheFile) + ).catch(err => console.warn('Error writing cache file:', err)), + ]); - await promise; return false; } diff --git a/src/mdx.ts b/src/mdx.ts index eb9bd281e2e12b..fa2b289c6112a9 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -8,7 +8,10 @@ import {bundleMDX} from 'mdx-bundler'; import {createReadStream, createWriteStream, mkdirSync} from 'node:fs'; import {access, opendir, readFile} from 'node:fs/promises'; import path from 'node:path'; -import {Readable} from 'node:stream'; +// @ts-expect-error ts(2305) -- For some reason "compose" is not recognized in the types +import {compose, Readable} from 'node:stream'; +import {json} from 'node:stream/consumers'; +import {pipeline} from 'node:stream/promises'; import { constants as zlibConstants, createBrotliCompress, @@ -40,6 +43,15 @@ import {FrontMatter, Platform, PlatformConfig} from './types'; import {isNotNil} from './utils'; import {isVersioned, VERSION_INDICATOR} from './versioning'; +type SlugFile = { + frontMatter: Platform & {slug: string}; + matter: Omit, 'data'> & { + data: Platform; + }; + mdxSource: string; + toc: TocNode[]; +}; + const root = process.cwd(); const CACHE_COMPRESS_LEVEL = 4; const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); @@ -47,53 +59,26 @@ mkdirSync(CACHE_DIR, {recursive: true}); const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); -async function readCacheFile(file: string): Promise { - const {resolve, reject, promise} = Promise.withResolvers(); - +async function readCacheFile(file: string): Promise { const reader = createReadStream(file); - reader.on('error', reject); - reader.pause(); - const decompressor = createBrotliDecompress(); - decompressor.on('error', reject); - const buffers: Buffer[] = []; - const stream = reader.pipe(decompressor); - stream.on('data', chunk => { - buffers.push(chunk); - }); - stream.on('finish', () => { - resolve(Buffer.concat(buffers).toString('utf8')); - }); - - reader.resume(); - return await promise; + return (await json(compose(reader, decompressor))) as T; } async function writeCacheFile(file: string, data: string) { - const {resolve, reject, promise} = Promise.withResolvers(); - - const reader = Readable.from(data); - reader.pause(); - - const writer = createWriteStream(file); - writer.on('error', reject); - - const compressor = createBrotliCompress({ - chunkSize: 32 * 1024, - params: { - [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, - [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, - [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, - }, - }); - const stream = reader.pipe(compressor).pipe(writer); - stream.on('error', reject); - stream.on('finish', resolve); - - reader.resume(); - - await promise; + await pipeline( + Readable.from(data), + createBrotliCompress({ + chunkSize: 32 * 1024, + params: { + [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, + [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, + [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, + }, + }), + createWriteStream(file) + ); } function formatSlug(slug: string) { @@ -255,7 +240,7 @@ async function getAllFilesFrontMatter(): Promise { ) as PlatformConfig; } catch (err) { // the file may not exist and that's fine, for anything else we throw - if (err.code !== 'ENOENT') { + if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR') { throw err; } } @@ -398,7 +383,7 @@ export const addVersionToFilePath = (filePath: string, version: string) => { return `${filePath}__v${version}`; }; -export async function getFileBySlug(slug: string) { +export async function getFileBySlug(slug: string): Promise { // no versioning on a config file const configPath = path.join(root, slug.split(VERSION_INDICATOR)[0], 'config.yml'); @@ -495,7 +480,7 @@ export async function getFileBySlug(slug: string) { const cacheFile = path.join(CACHE_DIR, cacheKey); try { - const cached = JSON.parse(await readCacheFile(cacheFile)); + const cached = await readCacheFile(cacheFile); return cached; } catch (err) { if (err.code !== 'ENOENT') { @@ -620,7 +605,7 @@ export async function getFileBySlug(slug: string) { mergedFrontmatter = {...frontmatter, ...configFrontmatter}; } - const resultObj = { + const resultObj: SlugFile = { matter: result.matter, mdxSource: code, toc, diff --git a/yarn.lock b/yarn.lock index cc68e32c1b4f84..d1b5a995b88d65 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3660,12 +3660,12 @@ dependencies: "@types/node" "*" -"@types/node@*", "@types/node@^20": - version "20.17.6" - resolved "https://registry.npmjs.org/@types/node/-/node-20.17.6.tgz" - integrity sha512-VEI7OdvK2wP7XHnsuXbAJnEpEkF6NjSN45QJlL4VGqZSXsnicpesdTWsg9RISeSdYd3yeRj/y3k5KGjUXYnFwQ== +"@types/node@*", "@types/node@^22": + version "22.15.32" + resolved "https://registry.yarnpkg.com/@types/node/-/node-22.15.32.tgz#c301cc2275b535a5e54bb81d516b1d2e9afe06e5" + integrity sha512-3jigKqgSjsH6gYZv2nEsqdXfZqIFGAV36XYYjf9KGZ3PSG+IhLecqPnI310RvjutyMwifE2hhhNEklOUrvx/wA== dependencies: - undici-types "~6.19.2" + undici-types "~6.21.0" "@types/parse-json@^4.0.0": version "4.0.2" @@ -12284,10 +12284,10 @@ unbox-primitive@^1.0.2: has-symbols "^1.0.3" which-boxed-primitive "^1.0.2" -undici-types@~6.19.2: - version "6.19.8" - resolved "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz" - integrity sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw== +undici-types@~6.21.0: + version "6.21.0" + resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.21.0.tgz#691d00af3909be93a7faa13be61b3a5b50ef12cb" + integrity sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ== undici@^5.25.4: version "5.28.5" From a42bbba3346d94055ec2dbd06b5a3bac271aa602 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 20:26:58 +0100 Subject: [PATCH 15/23] limit caching to CI --- next.config.ts | 4 ---- src/mdx.ts | 36 +++++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/next.config.ts b/next.config.ts index dce5e0466b9de0..56fb66502edf1b 100644 --- a/next.config.ts +++ b/next.config.ts @@ -38,10 +38,6 @@ const nextConfig = { trailingSlash: true, serverExternalPackages: ['rehype-preset-minify'], outputFileTracingExcludes, - experimental: { - webpackBuildWorker: true, - parallelServerCompiles: true, - }, webpack: (config, options) => { config.plugins.push( codecovNextJSWebpackPlugin({ diff --git a/src/mdx.ts b/src/mdx.ts index fa2b289c6112a9..b99a53bc3c02b1 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -240,7 +240,7 @@ async function getAllFilesFrontMatter(): Promise { ) as PlatformConfig; } catch (err) { // the file may not exist and that's fine, for anything else we throw - if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR') { + if (err.code !== 'ENOENT') { throw err; } } @@ -476,17 +476,21 @@ export async function getFileBySlug(slug: string): Promise { ); } - const cacheKey = md5(source); - const cacheFile = path.join(CACHE_DIR, cacheKey); + let cacheKey: string | null = null; + let cacheFile: string | null = null; + if (process.env.CI === '1') { + cacheKey = md5(source); + cacheFile = path.join(CACHE_DIR, cacheKey); - try { - const cached = await readCacheFile(cacheFile); - return cached; - } catch (err) { - if (err.code !== 'ENOENT') { - // If cache is corrupted, ignore and proceed - // eslint-disable-next-line no-console - console.warn(`Failed to read MDX cache: ${cacheFile}`, err); + try { + const cached = await readCacheFile(cacheFile); + return cached; + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR') { + // If cache is corrupted, ignore and proceed + // eslint-disable-next-line no-console + console.warn(`Failed to read MDX cache: ${cacheFile}`, err); + } } } @@ -615,10 +619,12 @@ export async function getFileBySlug(slug: string): Promise { }, }; - writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { - // eslint-disable-next-line no-console - console.warn(`Failed to write MDX cache: ${cacheFile}`, e); - }); + if (cacheFile) { + writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { + // eslint-disable-next-line no-console + console.warn(`Failed to write MDX cache: ${cacheFile}`, e); + }); + } return resultObj; } From 18d6ea61746aca83f25abb4037a5e69422bda784 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 20:53:21 +0100 Subject: [PATCH 16/23] fix incorrect import --- .../sourcemaps/overview/javascript.cloudflare.mdx | 4 +--- .../sourcemaps/upload/primer/javascript.cloudflare.mdx | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/platform-includes/sourcemaps/overview/javascript.cloudflare.mdx b/platform-includes/sourcemaps/overview/javascript.cloudflare.mdx index e4ffaf08ecadfe..4d7502d2cacb4a 100644 --- a/platform-includes/sourcemaps/overview/javascript.cloudflare.mdx +++ b/platform-includes/sourcemaps/overview/javascript.cloudflare.mdx @@ -19,9 +19,7 @@ If you want to configure source maps to upload manually, follow the guide for yo ### Guides for Source Maps -- - TypeScript (tsc) - +- TypeScript (tsc) If you're using a bundler like Webpack, Vite, Rollup, or Esbuild, use the diff --git a/platform-includes/sourcemaps/upload/primer/javascript.cloudflare.mdx b/platform-includes/sourcemaps/upload/primer/javascript.cloudflare.mdx index c8226f2478d54b..cea61cd2f65814 100644 --- a/platform-includes/sourcemaps/upload/primer/javascript.cloudflare.mdx +++ b/platform-includes/sourcemaps/upload/primer/javascript.cloudflare.mdx @@ -7,6 +7,6 @@ If you can't find the tool of your choice in the list below, we recommend you ch - + - \ No newline at end of file + From 2c2dcc90394e719aa9cee72e558905068b948518 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 21:38:25 +0100 Subject: [PATCH 17/23] remove all caching logic from mdx --- src/mdx.ts | 64 ------------------------------------------------------ 1 file changed, 64 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index b99a53bc3c02b1..83153141afd1f9 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -1,22 +1,10 @@ -import {BinaryLike, createHash} from 'crypto'; - import {cache} from 'react'; import matter from 'gray-matter'; import {s} from 'hastscript'; import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; -import {createReadStream, createWriteStream, mkdirSync} from 'node:fs'; import {access, opendir, readFile} from 'node:fs/promises'; import path from 'node:path'; -// @ts-expect-error ts(2305) -- For some reason "compose" is not recognized in the types -import {compose, Readable} from 'node:stream'; -import {json} from 'node:stream/consumers'; -import {pipeline} from 'node:stream/promises'; -import { - constants as zlibConstants, - createBrotliCompress, - createBrotliDecompress, -} from 'node:zlib'; import rehypeAutolinkHeadings from 'rehype-autolink-headings'; import rehypePresetMinify from 'rehype-preset-minify'; import rehypePrismDiff from 'rehype-prism-diff'; @@ -53,33 +41,6 @@ type SlugFile = { }; const root = process.cwd(); -const CACHE_COMPRESS_LEVEL = 4; -const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); -mkdirSync(CACHE_DIR, {recursive: true}); - -const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); - -async function readCacheFile(file: string): Promise { - const reader = createReadStream(file); - const decompressor = createBrotliDecompress(); - - return (await json(compose(reader, decompressor))) as T; -} - -async function writeCacheFile(file: string, data: string) { - await pipeline( - Readable.from(data), - createBrotliCompress({ - chunkSize: 32 * 1024, - params: { - [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, - [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, - [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, - }, - }), - createWriteStream(file) - ); -} function formatSlug(slug: string) { return slug.replace(/\.(mdx|md)/, ''); @@ -476,24 +437,6 @@ export async function getFileBySlug(slug: string): Promise { ); } - let cacheKey: string | null = null; - let cacheFile: string | null = null; - if (process.env.CI === '1') { - cacheKey = md5(source); - cacheFile = path.join(CACHE_DIR, cacheKey); - - try { - const cached = await readCacheFile(cacheFile); - return cached; - } catch (err) { - if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR') { - // If cache is corrupted, ignore and proceed - // eslint-disable-next-line no-console - console.warn(`Failed to read MDX cache: ${cacheFile}`, err); - } - } - } - process.env.ESBUILD_BINARY_PATH = path.join( root, 'node_modules', @@ -619,13 +562,6 @@ export async function getFileBySlug(slug: string): Promise { }, }; - if (cacheFile) { - writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { - // eslint-disable-next-line no-console - console.warn(`Failed to write MDX cache: ${cacheFile}`, e); - }); - } - return resultObj; } From 6a1e895db1c9154e96ea6a64d39e8a886ff1a7ca Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 21:54:44 +0100 Subject: [PATCH 18/23] simplification --- src/mdx.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 83153141afd1f9..825668ee7d14b5 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -72,10 +72,9 @@ const isSupported = ( let getDocsFrontMatterCache: Promise | undefined; export function getDocsFrontMatter(): Promise { - if (getDocsFrontMatterCache) { - return getDocsFrontMatterCache; + if (!getDocsFrontMatterCache) { + getDocsFrontMatterCache = getDocsFrontMatterUncached(); } - getDocsFrontMatterCache = getDocsFrontMatterUncached(); return getDocsFrontMatterCache; } From 49c83e1a57ae65afc5d1e56311cc04b9aa3e6497 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 21:54:47 +0100 Subject: [PATCH 19/23] Revert "remove all caching logic from mdx" This reverts commit 2c2dcc90394e719aa9cee72e558905068b948518. --- src/mdx.ts | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/mdx.ts b/src/mdx.ts index 825668ee7d14b5..9c8a838e97ec2e 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -1,10 +1,22 @@ +import {BinaryLike, createHash} from 'crypto'; + import {cache} from 'react'; import matter from 'gray-matter'; import {s} from 'hastscript'; import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; +import {createReadStream, createWriteStream, mkdirSync} from 'node:fs'; import {access, opendir, readFile} from 'node:fs/promises'; import path from 'node:path'; +// @ts-expect-error ts(2305) -- For some reason "compose" is not recognized in the types +import {compose, Readable} from 'node:stream'; +import {json} from 'node:stream/consumers'; +import {pipeline} from 'node:stream/promises'; +import { + constants as zlibConstants, + createBrotliCompress, + createBrotliDecompress, +} from 'node:zlib'; import rehypeAutolinkHeadings from 'rehype-autolink-headings'; import rehypePresetMinify from 'rehype-preset-minify'; import rehypePrismDiff from 'rehype-prism-diff'; @@ -41,6 +53,33 @@ type SlugFile = { }; const root = process.cwd(); +const CACHE_COMPRESS_LEVEL = 4; +const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); +mkdirSync(CACHE_DIR, {recursive: true}); + +const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); + +async function readCacheFile(file: string): Promise { + const reader = createReadStream(file); + const decompressor = createBrotliDecompress(); + + return (await json(compose(reader, decompressor))) as T; +} + +async function writeCacheFile(file: string, data: string) { + await pipeline( + Readable.from(data), + createBrotliCompress({ + chunkSize: 32 * 1024, + params: { + [zlibConstants.BROTLI_PARAM_MODE]: zlibConstants.BROTLI_MODE_TEXT, + [zlibConstants.BROTLI_PARAM_QUALITY]: CACHE_COMPRESS_LEVEL, + [zlibConstants.BROTLI_PARAM_SIZE_HINT]: data.length, + }, + }), + createWriteStream(file) + ); +} function formatSlug(slug: string) { return slug.replace(/\.(mdx|md)/, ''); @@ -436,6 +475,24 @@ export async function getFileBySlug(slug: string): Promise { ); } + let cacheKey: string | null = null; + let cacheFile: string | null = null; + if (process.env.CI === '1') { + cacheKey = md5(source); + cacheFile = path.join(CACHE_DIR, cacheKey); + + try { + const cached = await readCacheFile(cacheFile); + return cached; + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR') { + // If cache is corrupted, ignore and proceed + // eslint-disable-next-line no-console + console.warn(`Failed to read MDX cache: ${cacheFile}`, err); + } + } + } + process.env.ESBUILD_BINARY_PATH = path.join( root, 'node_modules', @@ -561,6 +618,13 @@ export async function getFileBySlug(slug: string): Promise { }, }; + if (cacheFile) { + writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { + // eslint-disable-next-line no-console + console.warn(`Failed to write MDX cache: ${cacheFile}`, e); + }); + } + return resultObj; } From 9ce08af32baa8e44ae7785f731375603ee8a7415 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 22:41:15 +0100 Subject: [PATCH 20/23] limit concurrency and try again --- package.json | 1 + src/files.ts | 34 ++----- src/mdx.ts | 244 ++++++++++++++++++++++++++++----------------------- yarn.lock | 12 +++ 4 files changed, 153 insertions(+), 138 deletions(-) diff --git a/package.json b/package.json index e3e1f299eed03c..1c80d5af6f92eb 100644 --- a/package.json +++ b/package.json @@ -78,6 +78,7 @@ "next-plausible": "^3.12.4", "next-themes": "^0.3.0", "nextjs-toploader": "^1.6.6", + "p-limit": "^6.2.0", "platformicons": "^8.0.4", "prism-sentry": "^1.0.2", "query-string": "^6.13.1", diff --git a/src/files.ts b/src/files.ts index db46761aa5bd84..ee2a99838cdae0 100644 --- a/src/files.ts +++ b/src/files.ts @@ -1,37 +1,13 @@ -import fs from 'fs'; +import {readdir} from 'fs/promises'; import path from 'path'; -// pipe two functions together -function pipe(f: (x: T) => U, g: (y: U) => V): (x: T) => V; -// pipe three functions -function pipe(f: (x: T) => U, g: (y: U) => V, h: (z: V) => W): (x: T) => W; -function pipe(...fns: Function[]) { - return x => fns.reduce((v, f) => f(v), x); -} - -const map = - (fn: (a: T) => U) => - (input: T[]) => - input.map(fn); - -const walkDir = (fullPath: string) => { - return fs.statSync(fullPath).isFile() ? fullPath : getAllFilesRecursively(fullPath); -}; - -const pathJoinPrefix = (prefix: string) => (extraPath: string) => - path.join(prefix, extraPath); - /** * @returns Array of file paths */ -const getAllFilesRecursively = (folder: string): [string] => { - return pipe( - // yes, this arrow function is necessary to narrow down the readdirSync overload - (x: string) => fs.readdirSync(x), - map(pipe(pathJoinPrefix(folder), walkDir)), - // flattenArray - x => x.flat(Infinity) - )(folder) as [string]; +const getAllFilesRecursively = async (folder: string): Promise => { + return (await readdir(folder, {withFileTypes: true, recursive: true})) + .filter(dirent => dirent.isFile()) + .map(dirent => path.join(dirent.parentPath || dirent.path, dirent.name)); }; export default getAllFilesRecursively; diff --git a/src/mdx.ts b/src/mdx.ts index 9c8a838e97ec2e..287c6d847c453c 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -17,6 +17,7 @@ import { createBrotliCompress, createBrotliDecompress, } from 'node:zlib'; +import {limitFunction} from 'p-limit'; import rehypeAutolinkHeadings from 'rehype-autolink-headings'; import rehypePresetMinify from 'rehype-preset-minify'; import rehypePrismDiff from 'rehype-prism-diff'; @@ -53,6 +54,7 @@ type SlugFile = { }; const root = process.cwd(); +const FILE_CONCURRENCY_LIMIT = 200; const CACHE_COMPRESS_LEVEL = 4; const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); mkdirSync(CACHE_DIR, {recursive: true}); @@ -176,23 +178,28 @@ async function getDocsFrontMatterUncached(): Promise { export async function getDevDocsFrontMatter(): Promise { const folder = 'develop-docs'; const docsPath = path.join(root, folder); - const files = getAllFilesRecursively(docsPath); + const files = await getAllFilesRecursively(docsPath); const fmts = ( await Promise.all( - files.map(async file => { - const fileName = file.slice(docsPath.length + 1); - if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { - return undefined; - } - - const source = await readFile(file, 'utf8'); - const {data: frontmatter} = matter(source); - return { - ...(frontmatter as FrontMatter), - slug: fileName.replace(/\/index.mdx?$/, '').replace(/\.mdx?$/, ''), - sourcePath: path.join(folder, fileName), - }; - }) + files.map( + limitFunction( + async file => { + const fileName = file.slice(docsPath.length + 1); + if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { + return undefined; + } + + const source = await readFile(file, 'utf8'); + const {data: frontmatter} = matter(source); + return { + ...(frontmatter as FrontMatter), + slug: fileName.replace(/\/index.mdx?$/, '').replace(/\.mdx?$/, ''), + sourcePath: path.join(folder, fileName), + }; + }, + {concurrency: FILE_CONCURRENCY_LIMIT} + ) + ) ) ).filter(isNotNil); return fmts; @@ -200,27 +207,31 @@ export async function getDevDocsFrontMatter(): Promise { async function getAllFilesFrontMatter(): Promise { const docsPath = path.join(root, 'docs'); - const files = getAllFilesRecursively(docsPath); + const files = await getAllFilesRecursively(docsPath); const allFrontMatter: FrontMatter[] = []; + await Promise.all( - files.map(async file => { - const fileName = file.slice(docsPath.length + 1); - if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { - return; - } + files.map( + limitFunction(async file => { + const fileName = file.slice(docsPath.length + 1); + if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { + return; + } - if (fileName.indexOf('/common/') !== -1) { - return; - } + if (fileName.indexOf('/common/') !== -1) { + return; + } - const source = await readFile(file, 'utf8'); - const {data: frontmatter} = matter(source); - allFrontMatter.push({ - ...(frontmatter as FrontMatter), - slug: formatSlug(fileName), - sourcePath: path.join('docs', fileName), - }); - }) + const source = await readFile(file, 'utf8'); + const {data: frontmatter} = matter(source); + allFrontMatter.push({ + ...(frontmatter as FrontMatter), + slug: formatSlug(fileName), + sourcePath: path.join('docs', fileName), + }); + }, + {concurrency: FILE_CONCURRENCY_LIMIT}) + ) ); // Add all `common` files in the right place. @@ -251,46 +262,56 @@ async function getAllFilesFrontMatter(): Promise { continue; } - const commonFileNames: string[] = getAllFilesRecursively(commonPath).filter( + const commonFileNames: string[] = (await getAllFilesRecursively(commonPath)).filter( p => path.extname(p) === '.mdx' ); const commonFiles = await Promise.all( - commonFileNames.map(async commonFileName => { - const source = await readFile(commonFileName, 'utf8'); - const {data: frontmatter} = matter(source); - return {commonFileName, frontmatter: frontmatter as FrontMatter}; - }) + commonFileNames.map( + limitFunction( + async commonFileName => { + const source = await readFile(commonFileName, 'utf8'); + const {data: frontmatter} = matter(source); + return {commonFileName, frontmatter: frontmatter as FrontMatter}; + }, + {concurrency: FILE_CONCURRENCY_LIMIT} + ) + ) ); await Promise.all( - commonFiles.map(async f => { - if (!isSupported(f.frontmatter, platformName)) { - return; - } - - const subpath = f.commonFileName.slice(commonPath.length + 1); - const slug = f.commonFileName - .slice(docsPath.length + 1) - .replace(/\/common\//, '/'); - const noFrontMatter = ( - await Promise.allSettled([ - access(path.join(docsPath, slug)), - access(path.join(docsPath, slug.replace('/index.mdx', '.mdx'))), - ]) - ).every(r => r.status === 'rejected'); - if (noFrontMatter) { - let frontmatter = f.frontmatter; - if (subpath === 'index.mdx') { - frontmatter = {...frontmatter, ...platformFrontmatter}; - } - allFrontMatter.push({ - ...frontmatter, - slug: formatSlug(slug), - sourcePath: 'docs/' + f.commonFileName.slice(docsPath.length + 1), - }); - } - }) + commonFiles.map( + limitFunction( + async f => { + if (!isSupported(f.frontmatter, platformName)) { + return; + } + + const subpath = f.commonFileName.slice(commonPath.length + 1); + const slug = f.commonFileName + .slice(docsPath.length + 1) + .replace(/\/common\//, '/'); + const noFrontMatter = ( + await Promise.allSettled([ + access(path.join(docsPath, slug)), + access(path.join(docsPath, slug.replace('/index.mdx', '.mdx'))), + ]) + ).every(r => r.status === 'rejected'); + if (noFrontMatter) { + let frontmatter = f.frontmatter; + if (subpath === 'index.mdx') { + frontmatter = {...frontmatter, ...platformFrontmatter}; + } + allFrontMatter.push({ + ...frontmatter, + slug: formatSlug(slug), + sourcePath: 'docs/' + f.commonFileName.slice(docsPath.length + 1), + }); + } + }, + {concurrency: FILE_CONCURRENCY_LIMIT} + ) + ) ); const guidesPath = path.join(docsPath, 'platforms', platformName, 'guides'); @@ -319,30 +340,41 @@ async function getAllFilesFrontMatter(): Promise { } await Promise.all( - commonFiles.map(async f => { - if (!isSupported(f.frontmatter, platformName, guideName)) { - return; - } - - const subpath = f.commonFileName.slice(commonPath.length + 1); - const slug = path.join('platforms', platformName, 'guides', guideName, subpath); - try { - await access(path.join(docsPath, slug)); - return; - } catch { - // pass - } - - let frontmatter = f.frontmatter; - if (subpath === 'index.mdx') { - frontmatter = {...frontmatter, ...guideFrontmatter}; - } - allFrontMatter.push({ - ...frontmatter, - slug: formatSlug(slug), - sourcePath: 'docs/' + f.commonFileName.slice(docsPath.length + 1), - }); - }) + commonFiles.map( + limitFunction( + async f => { + if (!isSupported(f.frontmatter, platformName, guideName)) { + return; + } + + const subpath = f.commonFileName.slice(commonPath.length + 1); + const slug = path.join( + 'platforms', + platformName, + 'guides', + guideName, + subpath + ); + try { + await access(path.join(docsPath, slug)); + return; + } catch { + // pass + } + + let frontmatter = f.frontmatter; + if (subpath === 'index.mdx') { + frontmatter = {...frontmatter, ...guideFrontmatter}; + } + allFrontMatter.push({ + ...frontmatter, + slug: formatSlug(slug), + sourcePath: 'docs/' + f.commonFileName.slice(docsPath.length + 1), + }); + }, + {concurrency: FILE_CONCURRENCY_LIMIT} + ) + ) ); } } @@ -475,21 +507,17 @@ export async function getFileBySlug(slug: string): Promise { ); } - let cacheKey: string | null = null; - let cacheFile: string | null = null; - if (process.env.CI === '1') { - cacheKey = md5(source); - cacheFile = path.join(CACHE_DIR, cacheKey); + const cacheKey = md5(source); + const cacheFile = path.join(CACHE_DIR, cacheKey); - try { - const cached = await readCacheFile(cacheFile); - return cached; - } catch (err) { - if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR') { - // If cache is corrupted, ignore and proceed - // eslint-disable-next-line no-console - console.warn(`Failed to read MDX cache: ${cacheFile}`, err); - } + try { + const cached = await readCacheFile(cacheFile); + return cached; + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ABORT_ERR') { + // If cache is corrupted, ignore and proceed + // eslint-disable-next-line no-console + console.warn(`Failed to read MDX cache: ${cacheFile}`, err); } } @@ -618,12 +646,10 @@ export async function getFileBySlug(slug: string): Promise { }, }; - if (cacheFile) { - writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { - // eslint-disable-next-line no-console - console.warn(`Failed to write MDX cache: ${cacheFile}`, e); - }); - } + writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => { + // eslint-disable-next-line no-console + console.warn(`Failed to write MDX cache: ${cacheFile}`, e); + }); return resultObj; } diff --git a/yarn.lock b/yarn.lock index d1b5a995b88d65..1cbea29b29578e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10044,6 +10044,13 @@ p-limit@^3.0.1, p-limit@^3.0.2, p-limit@^3.1.0: dependencies: yocto-queue "^0.1.0" +p-limit@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-6.2.0.tgz#c254d22ba6aeef441a3564c5e6c2f2da59268a0f" + integrity sha512-kuUqqHNUqoIWp/c467RI4X6mmyuojY5jGutNU0wVTmEOOfcuwLqyMVoAi9MKi2Ak+5i9+nhmrK4ufZE8069kHA== + dependencies: + yocto-queue "^1.1.1" + p-locate@^4.1.0: version "4.1.0" resolved "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz" @@ -13054,6 +13061,11 @@ yocto-queue@^0.1.0: resolved "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz" integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q== +yocto-queue@^1.1.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-1.2.1.tgz#36d7c4739f775b3cbc28e6136e21aa057adec418" + integrity sha512-AyeEbWOu/TAXdxlV9wmGcR0+yh2j3vYPGOECcIj2S7MkrLyC7ne+oye2BKTItt0ii2PHk4cDy+95+LshzbXnGg== + zod@^3.22.4: version "3.23.8" resolved "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz" From b85ef271a5e0dd9fceb2a9db7ff174c7d2ff3a24 Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Tue, 24 Jun 2025 21:42:49 +0000 Subject: [PATCH 21/23] [getsentry/action-github-commit] Auto commit --- src/mdx.ts | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 287c6d847c453c..90b29a34109afe 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -212,25 +212,27 @@ async function getAllFilesFrontMatter(): Promise { await Promise.all( files.map( - limitFunction(async file => { - const fileName = file.slice(docsPath.length + 1); - if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { - return; - } - - if (fileName.indexOf('/common/') !== -1) { - return; - } - - const source = await readFile(file, 'utf8'); - const {data: frontmatter} = matter(source); - allFrontMatter.push({ - ...(frontmatter as FrontMatter), - slug: formatSlug(fileName), - sourcePath: path.join('docs', fileName), - }); - }, - {concurrency: FILE_CONCURRENCY_LIMIT}) + limitFunction( + async file => { + const fileName = file.slice(docsPath.length + 1); + if (path.extname(fileName) !== '.md' && path.extname(fileName) !== '.mdx') { + return; + } + + if (fileName.indexOf('/common/') !== -1) { + return; + } + + const source = await readFile(file, 'utf8'); + const {data: frontmatter} = matter(source); + allFrontMatter.push({ + ...(frontmatter as FrontMatter), + slug: formatSlug(fileName), + sourcePath: path.join('docs', fileName), + }); + }, + {concurrency: FILE_CONCURRENCY_LIMIT} + ) ) ); From a93fa94ff455a245e60e113dab6545f4f65d1ccf Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Tue, 24 Jun 2025 23:04:49 +0100 Subject: [PATCH 22/23] add crucial comment --- src/mdx.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mdx.ts b/src/mdx.ts index 90b29a34109afe..e328e0e4000909 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -54,6 +54,11 @@ type SlugFile = { }; const root = process.cwd(); +// We need to limit this as we have code doing things like Promise.all(allFiles.map(...)) +// where `allFiles` is in the order of thousands. This not only slows down the build but +// it also crashes the dynamic pages such as `/platform-redirect` as these run on Vercel +// Functions which looks like AWS Lambda and we get `EMFILE` errors when trying to open +// so many files at once. const FILE_CONCURRENCY_LIMIT = 200; const CACHE_COMPRESS_LEVEL = 4; const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); From fcc9bac57964baaacae0dc5bb881dcab3cbaa2c0 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Wed, 25 Jun 2025 08:07:52 +0100 Subject: [PATCH 23/23] mo cache mo free time --- src/mdx.ts | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index e328e0e4000909..9336d7f341372e 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -180,11 +180,11 @@ async function getDocsFrontMatterUncached(): Promise { return frontMatter; } -export async function getDevDocsFrontMatter(): Promise { +export async function getDevDocsFrontMatterUncached(): Promise { const folder = 'develop-docs'; const docsPath = path.join(root, folder); const files = await getAllFilesRecursively(docsPath); - const fmts = ( + const frontMatters = ( await Promise.all( files.map( limitFunction( @@ -207,7 +207,16 @@ export async function getDevDocsFrontMatter(): Promise { ) ) ).filter(isNotNil); - return fmts; + return frontMatters; +} + +let getDevDocsFrontMatterCache: Promise | undefined; + +export function getDevDocsFrontMatter(): Promise { + if (!getDevDocsFrontMatterCache) { + getDevDocsFrontMatterCache = getDevDocsFrontMatterUncached(); + } + return getDevDocsFrontMatterCache; } async function getAllFilesFrontMatter(): Promise {