Skip to content
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"dev:developer-docs": "yarn enforce-redirects && NEXT_PUBLIC_DEVELOPER_DOCS=1 yarn dev",
"build:developer-docs": "yarn enforce-redirects && git submodule init && git submodule update && NEXT_PUBLIC_DEVELOPER_DOCS=1 yarn build",
"build": "yarn enforce-redirects && next build && yarn generate-md-exports",
"build:preview": "yarn enforce-redirects && next build",
"generate-md-exports": "node scripts/generate-md-exports.mjs",
"vercel:build:developer-docs": "yarn enforce-redirects && git submodule init && git submodule update && NEXT_PUBLIC_DEVELOPER_DOCS=1 yarn build",
"start:dev": "NODE_ENV=development yarn build && yarn start",
Expand Down
8 changes: 5 additions & 3 deletions src/docTree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ export function getDocsRootNode(): Promise<DocNode> {
}

async function getDocsRootNodeUncached(): Promise<DocNode> {
return frontmatterToTree(
await (isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter())
);
const frontmatter = await (isDeveloperDocs
? getDevDocsFrontMatter()
: getDocsFrontMatter());
const tree = frontmatterToTree(frontmatter);
return tree;
}

const sidebarOrderSorter = (a: FrontMatter, b: FrontMatter) => {
Expand Down
102 changes: 91 additions & 11 deletions src/mdx.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,55 @@ if (process.env.CI) {
mkdirSync(CACHE_DIR, {recursive: true});
}

// Cache registry hash per worker to avoid recomputing for every file
let cachedRegistryHash: string | null = null;
async function getRegistryHash(): Promise<string> {
if (cachedRegistryHash) {
return cachedRegistryHash;
}
const [apps, packages] = await Promise.all([getAppRegistry(), getPackageRegistry()]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a race condition here: if you call this function 3 times back to back, it would make 3 separate calls.

What you need for proper caching is to change the type of cachedRegistryHash to Promise<string>, and do:

cachedRegistryHash = Promise.all(...). then(([apps, packages]) => md5(...));
return cachedRegistryHash;

cachedRegistryHash = md5(JSON.stringify({apps, packages}));
return cachedRegistryHash;
}

// Track cache statistics per worker (silent tracking)
const cacheStats = {
registryHits: 0,
registryMisses: 0,
uniqueRegistryFiles: new Set<string>(),
};

// Log summary periodically and at end
let lastSummaryLog = Date.now();
function logCacheSummary(force = false) {
const now = Date.now();
// Log every 30 seconds or when forced
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic seems unnecessary? Why not just emit at the end?

if (!force && now - lastSummaryLog < 30000) {
return;
}
lastSummaryLog = now;

const total = cacheStats.registryHits + cacheStats.registryMisses;
if (total === 0) {
return;
}

const hitRate = ((cacheStats.registryHits / total) * 100).toFixed(1);
const uniqueFiles = cacheStats.uniqueRegistryFiles.size;

// eslint-disable-next-line no-console
console.log(
`📊 [MDX Cache] ${cacheStats.registryHits}/${total} registry files cached (${hitRate}% hit rate, ${uniqueFiles} unique files)`
);
}

// Log final summary when worker exits
if (typeof process !== 'undefined') {
process.on('beforeExit', () => {
logCacheSummary(true);
});
}

const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex');

async function readCacheFile<T>(file: string): Promise<T> {
Expand Down Expand Up @@ -209,6 +258,7 @@ export async function getDevDocsFrontMatterUncached(): Promise<FrontMatter[]> {
)
)
).filter(isNotNil);

return frontMatters;
}

Expand Down Expand Up @@ -396,6 +446,7 @@ async function getAllFilesFrontMatter(): Promise<FrontMatter[]> {
);
}
}

return allFrontMatter;
}

Expand Down Expand Up @@ -531,22 +582,37 @@ export async function getFileBySlug(slug: string): Promise<SlugFile> {
const outdir = path.join(root, 'public', 'mdx-images');
await mkdir(outdir, {recursive: true});

// If the file contains content that depends on the Release Registry (such as an SDK's latest version), avoid using the cache for that file, i.e. always rebuild it.
// This is because the content from the registry might have changed since the last time the file was cached.
// If a new component that injects content from the registry is introduced, it should be added to the patterns below.
const skipCache =
// Check if file depends on Release Registry
const dependsOnRegistry =
source.includes('@inject') ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if this @inject thing was related to the registry

source.includes('<PlatformSDKPackageName') ||
source.includes('<LambdaLayerDetail');

if (process.env.CI) {
if (skipCache) {
// eslint-disable-next-line no-console
console.info(
`Not using cached version of ${sourcePath}, as its content depends on the Release Registry`
);
// Build cache key from source content
const sourceHash = md5(source);

// For files that depend on registry, include registry version in cache key
// This prevents serving stale content when registry is updated
if (dependsOnRegistry) {
try {
// Get registry hash (cached per worker to avoid redundant fetches)
const registryHash = await getRegistryHash();
cacheKey = `${sourceHash}-${registryHash}`;
} catch (err) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic can and probably should be improved: the only way this can throw an exception should be a network related issue. In that case, pages depending on the registry will also have a problem so the try-catch is redundant. It's also wasteful as if it raises an exception, that means it will raise an exception for every single page.

I'd rather add a retry mechanism into the cache key function and don't handle the exception if the retried fail, halting the build as we need the registry connection for the build.

// If registry fetch fails, skip caching for safety
// eslint-disable-next-line no-console
console.warn(
`Failed to fetch registry for cache key, skipping cache: ${err.message}`
);
cacheKey = null;
}
} else {
cacheKey = md5(source);
// Regular files without registry dependencies
cacheKey = sourceHash;
}

if (cacheKey) {
cacheFile = path.join(CACHE_DIR, `${cacheKey}.br`);
assetsCacheDir = path.join(CACHE_DIR, cacheKey);

Expand All @@ -555,6 +621,12 @@ export async function getFileBySlug(slug: string): Promise<SlugFile> {
readCacheFile<SlugFile>(cacheFile),
cp(assetsCacheDir, outdir, {recursive: true}),
]);
// Track cache hit silently
if (dependsOnRegistry) {
cacheStats.registryHits++;
cacheStats.uniqueRegistryFiles.add(sourcePath);
logCacheSummary(); // Periodically log summary (every 30s)
}
return cached;
} catch (err) {
if (
Expand All @@ -570,6 +642,13 @@ export async function getFileBySlug(slug: string): Promise<SlugFile> {
}
}

// Track cache miss silently
if (dependsOnRegistry) {
cacheStats.registryMisses++;
cacheStats.uniqueRegistryFiles.add(sourcePath);
logCacheSummary(); // Periodically log summary (every 30s)
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Cache Miss Tracking Fails in Non-CI Environments

The cache miss tracking for registry-dependent files occurs unconditionally, even when process.env.CI is false and the cache system is not being used. The condition should be if (process.env.CI && dependsOnRegistry) instead of just if (dependsOnRegistry) to avoid recording false cache misses when outside of CI environments. This causes misleading cache statistics when the caching system isn't active.

Fix in Cursor Fix in Web


process.env.ESBUILD_BINARY_PATH = path.join(
root,
'node_modules',
Expand Down Expand Up @@ -700,7 +779,8 @@ export async function getFileBySlug(slug: string): Promise<SlugFile> {
},
};

if (assetsCacheDir && cacheFile && !skipCache) {
// Save to cache if we have a cache key (we now cache everything, including registry-dependent files)
if (assetsCacheDir && cacheFile && cacheKey) {
await cp(assetsCacheDir, outdir, {recursive: true});
writeCacheFile(cacheFile, JSON.stringify(resultObj)).catch(e => {
// eslint-disable-next-line no-console
Expand Down
2 changes: 2 additions & 0 deletions vercel.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
{
"$schema": "https://openapi.vercel.sh/vercel.json",
"buildCommand": "sh vercel.sh",
"headers": [
{
"source": "/(.*)",
Expand Down
7 changes: 7 additions & 0 deletions vercel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

if [[ $VERCEL_ENV == "production" ]] ; then
yarn run build
else
yarn run build:preview
fi
Loading