Skip to content

Commit d0a3e3d

Browse files
nearestnaborsclaude
andcommitted
Add tests for clean markdown file generation
New tests verify: - Every MDX page has a corresponding clean markdown file - Clean markdown files don't contain raw MDX syntax (outside code blocks) - Clean markdown files don't contain unwanted HTML elements - Clean markdown files aren't stale (source MDX is newer) These tests would have caught the Vercel deployment issue where markdown files weren't being generated. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b8bdf2d commit d0a3e3d

File tree

1 file changed

+197
-0
lines changed

1 file changed

+197
-0
lines changed

tests/clean-markdown.test.ts

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
import { existsSync, readFileSync, statSync } from "node:fs";
2+
import path from "node:path";
3+
import fg from "fast-glob";
4+
import { describe, expect, test } from "vitest";
5+
6+
const TIMEOUT = 30_000;
7+
const MARKDOWN_DIR = "public/_markdown";
8+
9+
// Regex patterns at module level for performance
10+
const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
11+
const APP_PREFIX_PATTERN = /^app\//;
12+
const PAGE_MDX_SUFFIX_PATTERN = /\/page\.mdx$/;
13+
14+
/**
15+
* Strips fenced code blocks from markdown content
16+
* so we don't match patterns inside code examples
17+
*/
18+
function stripCodeBlocks(content: string): string {
19+
return content.replace(CODE_BLOCK_PATTERN, "");
20+
}
21+
22+
/**
23+
* Converts an MDX file path to its corresponding clean markdown path
24+
* e.g., app/en/home/page.mdx -> en/home
25+
*/
26+
function mdxPathToRelativePath(mdxFile: string): string {
27+
return mdxFile
28+
.replace(APP_PREFIX_PATTERN, "")
29+
.replace(PAGE_MDX_SUFFIX_PATTERN, "");
30+
}
31+
32+
// Patterns that indicate raw MDX syntax leaked into clean markdown
33+
// These are checked OUTSIDE of code blocks only
34+
const MDX_PATTERNS = [
35+
{ pattern: /^import\s+/m, name: "import statement" },
36+
{ pattern: /<Steps>|<\/Steps>/g, name: "<Steps> component" },
37+
{ pattern: /<Tabs[\s>]/g, name: "<Tabs> component" },
38+
{ pattern: /<Callout[\s>]/g, name: "<Callout> component" },
39+
{ pattern: /<GuideOverview[\s>]/g, name: "<GuideOverview> component" },
40+
{ pattern: /<Card[\s>]/g, name: "<Card> component" },
41+
{ pattern: /<Cards[\s>]/g, name: "<Cards> component" },
42+
];
43+
44+
// HTML elements that should be cleaned from markdown
45+
const HTML_PATTERNS = [
46+
{ pattern: /<script[\s>]/gi, name: "<script>" },
47+
{ pattern: /<style[\s>]/gi, name: "<style>" },
48+
{ pattern: /<svg[\s>]/gi, name: "<svg>" },
49+
{ pattern: /<nav[\s>]/gi, name: "<nav>" },
50+
{ pattern: /<footer[\s>]/gi, name: "<footer>" },
51+
{ pattern: /<aside[\s>]/gi, name: "<aside>" },
52+
];
53+
54+
describe("Clean Markdown Files", () => {
55+
test(
56+
"every MDX page has a corresponding clean markdown file",
57+
async () => {
58+
const mdxFiles = await fg("app/**/page.mdx", {
59+
ignore: ["app/_*/**"],
60+
});
61+
62+
const missing: string[] = [];
63+
64+
for (const mdxFile of mdxFiles) {
65+
// Convert app/en/home/page.mdx -> public/_markdown/en/home.md
66+
const relativePath = mdxPathToRelativePath(mdxFile);
67+
const markdownPath = path.join(MARKDOWN_DIR, `${relativePath}.md`);
68+
69+
if (!existsSync(markdownPath)) {
70+
missing.push(`${mdxFile} -> ${markdownPath}`);
71+
}
72+
}
73+
74+
if (missing.length > 0) {
75+
console.error("\nMissing clean markdown files:");
76+
for (const file of missing) {
77+
console.error(` - ${file}`);
78+
}
79+
console.error(
80+
"\nRun 'pnpm generate:markdown' to regenerate clean markdown files."
81+
);
82+
}
83+
84+
expect(
85+
missing.length,
86+
`${missing.length} MDX files are missing corresponding clean markdown files`
87+
).toBe(0);
88+
},
89+
TIMEOUT
90+
);
91+
92+
test(
93+
"clean markdown files do not contain raw MDX syntax",
94+
async () => {
95+
const markdownFiles = await fg(`${MARKDOWN_DIR}/**/*.md`);
96+
const errors: Array<{ file: string; issue: string }> = [];
97+
98+
for (const file of markdownFiles) {
99+
const content = readFileSync(file, "utf-8");
100+
// Strip code blocks so we don't match patterns in code examples
101+
const contentWithoutCode = stripCodeBlocks(content);
102+
103+
for (const { pattern, name } of MDX_PATTERNS) {
104+
if (pattern.test(contentWithoutCode)) {
105+
errors.push({ file, issue: `contains ${name}` });
106+
}
107+
}
108+
}
109+
110+
if (errors.length > 0) {
111+
console.error("\nClean markdown files with raw MDX syntax:");
112+
for (const { file, issue } of errors) {
113+
console.error(` - ${file}: ${issue}`);
114+
}
115+
}
116+
117+
expect(
118+
errors.length,
119+
`${errors.length} clean markdown files contain raw MDX syntax`
120+
).toBe(0);
121+
},
122+
TIMEOUT
123+
);
124+
125+
test(
126+
"clean markdown files do not contain unwanted HTML elements",
127+
async () => {
128+
const markdownFiles = await fg(`${MARKDOWN_DIR}/**/*.md`);
129+
const errors: Array<{ file: string; issue: string }> = [];
130+
131+
for (const file of markdownFiles) {
132+
const content = readFileSync(file, "utf-8");
133+
134+
for (const { pattern, name } of HTML_PATTERNS) {
135+
if (pattern.test(content)) {
136+
errors.push({ file, issue: `contains ${name}` });
137+
}
138+
}
139+
}
140+
141+
if (errors.length > 0) {
142+
console.error("\nClean markdown files with unwanted HTML:");
143+
for (const { file, issue } of errors) {
144+
console.error(` - ${file}: ${issue}`);
145+
}
146+
}
147+
148+
expect(
149+
errors.length,
150+
`${errors.length} clean markdown files contain unwanted HTML elements`
151+
).toBe(0);
152+
},
153+
TIMEOUT
154+
);
155+
156+
test(
157+
"clean markdown files are not stale (modified after source MDX)",
158+
async () => {
159+
const mdxFiles = await fg("app/**/page.mdx", {
160+
ignore: ["app/_*/**"],
161+
});
162+
163+
const stale: Array<{ mdx: string; md: string }> = [];
164+
165+
for (const mdxFile of mdxFiles) {
166+
const relativePath = mdxPathToRelativePath(mdxFile);
167+
const markdownPath = path.join(MARKDOWN_DIR, `${relativePath}.md`);
168+
169+
if (existsSync(markdownPath)) {
170+
const mdxStat = statSync(mdxFile);
171+
const mdStat = statSync(markdownPath);
172+
173+
// If MDX was modified after the markdown, it's stale
174+
if (mdxStat.mtime > mdStat.mtime) {
175+
stale.push({ mdx: mdxFile, md: markdownPath });
176+
}
177+
}
178+
}
179+
180+
if (stale.length > 0) {
181+
console.error("\nStale clean markdown files (source MDX is newer):");
182+
for (const { mdx, md } of stale) {
183+
console.error(` - ${mdx} is newer than ${md}`);
184+
}
185+
console.error(
186+
"\nRun 'pnpm generate:markdown' to regenerate clean markdown files."
187+
);
188+
}
189+
190+
expect(
191+
stale.length,
192+
`${stale.length} clean markdown files are stale and need regeneration`
193+
).toBe(0);
194+
},
195+
TIMEOUT
196+
);
197+
});

0 commit comments

Comments
 (0)