Skip to content

Commit cc1d81c

Browse files
Ai descriptions util (#25736)
* Edits * final files * remove tab * fix more tabs * fixed steps * fix * final fix * Update src/content/docs/cloudflare-one/tutorials/okta-u2f.mdx Co-authored-by: Pedro Sousa <[email protected]> * Update src/content/docs/cloudflare-one/tutorials/vnc-client-in-browser.mdx Co-authored-by: Pedro Sousa <[email protected]> * Update src/content/docs/r2/tutorials/cloudflare-access.mdx Co-authored-by: Pedro Sousa <[email protected]> * Update src/content/docs/pages/how-to/deploy-a-wordpress-site.mdx Co-authored-by: Pedro Sousa <[email protected]> * Update src/content/docs/pages/migrations/migrating-from-netlify.mdx Co-authored-by: Pedro Sousa <[email protected]> * Update src/content/docs/reference-architecture/design-guides/network-vpn-migration.mdx Co-authored-by: Pedro Sousa <[email protected]> * Update src/content/docs/ssl/keyless-ssl/hardware-security-modules/ibm-cloud-hsm.mdx Co-authored-by: Pedro Sousa <[email protected]> * Update src/content/docs/turnstile/tutorials/excluding-turnstile-from-e2e-tests.mdx Co-authored-by: Pedro Sousa <[email protected]> * update --------- Co-authored-by: Pedro Sousa <[email protected]>
1 parent bff96d7 commit cc1d81c

File tree

179 files changed

+1539
-698
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

179 files changed

+1539
-698
lines changed

bin/generate-descriptions.ts

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
#!/usr/bin/env tsx
2+
3+
/**
4+
* This script generates descriptions for MDX files in the docs directory
5+
* that don't have a description field in their frontmatter.
6+
*
7+
* It uses the rendered markdown from the distmd directory to generate descriptions
8+
* by sending the content to a localhost:8787 application.
9+
*
10+
* To run, you'll need to do the following:
11+
* 1. Get your local build setup:
12+
* 1. Run `npm run build` to build the local docs.
13+
* 2. Run `npx tsx bin/generate-index-md.ts` to generate the index.md files (saves on tokens) + avoids extra HTML.
14+
* 2. Have a local Worker running on `localhost:8787` with the following code (also requires adding a binding in the Wrangler config file):
15+
*
16+
* ```
17+
* export interface Env {
18+
* AI: Ai;
19+
* }
20+
21+
* export default {
22+
* async fetch(request, env): Promise<Response> {
23+
* const response = await env.AI.run("@cf/facebook/bart-large-cnn", {
24+
* input_text: await request.text(),
25+
* max_length: 60
26+
* });
27+
* return Response.json(response.summary);
28+
* },
29+
* } satisfies ExportedHandler<Env>;
30+
* ```
31+
* 3. Run `npx tsx bin/generate-descriptions.ts --pcx-content-type $TYPE` to generate the descriptions.
32+
*
33+
*/
34+
35+
import fs from "fs/promises";
36+
import path from "path";
37+
import globby from "fast-glob";
38+
import matter from "gray-matter";
39+
40+
const DOCS_DIR = path.join(process.cwd(), "src/content/docs");
41+
const DISTMD_DIR = path.join(process.cwd(), "distmd");
42+
43+
// Localhost application URL
44+
const LOCALHOST_URL = "http://localhost:8787";
45+
46+
/**
47+
* Sends text content to localhost application and receives description back
48+
*/
49+
async function generateDescriptionFromAPI(
50+
content: string,
51+
): Promise<string | undefined> {
52+
try {
53+
const response = await fetch(LOCALHOST_URL, {
54+
method: "POST",
55+
headers: {
56+
"Content-Type": "text/plain",
57+
},
58+
body: content,
59+
});
60+
61+
if (!response.ok) {
62+
throw new Error(`HTTP error! status: ${response.status}`);
63+
}
64+
65+
const description = await response.text();
66+
// Remove surrounding quotes and all square brackets
67+
let trimmed = description.trim();
68+
69+
// Remove surrounding quotes
70+
if (
71+
(trimmed.startsWith('"') && trimmed.endsWith('"')) ||
72+
(trimmed.startsWith("'") && trimmed.endsWith("'"))
73+
) {
74+
trimmed = trimmed.slice(1, -1);
75+
}
76+
77+
// Remove all square brackets from the text
78+
trimmed = trimmed.replace(/\[|\]/g, "");
79+
80+
return trimmed.trim();
81+
} catch (error) {
82+
console.error("Error calling localhost API:", error);
83+
return undefined;
84+
}
85+
}
86+
87+
/**
88+
* Gets the rendered markdown path for a docs file
89+
*/
90+
function getRenderedPath(docPath: string): string {
91+
// Convert /src/content/docs/product/path/file.mdx to /distmd/product/path/file/index.md
92+
const relativePath = path.relative(DOCS_DIR, docPath);
93+
const pathWithoutExt = relativePath.replace(/\.mdx$/, "");
94+
const filename = path.basename(pathWithoutExt);
95+
const dirPath = path.dirname(pathWithoutExt);
96+
return path.join(DISTMD_DIR, dirPath, filename, "index.md");
97+
}
98+
99+
/**
100+
* Updates the frontmatter of an MDX file with a description
101+
* Ensures that only the description field is modified and all other fields remain unchanged
102+
* @returns boolean indicating whether the file was updated (true) or skipped (false)
103+
*/
104+
async function updateFrontmatter(
105+
filePath: string,
106+
description: string,
107+
): Promise<boolean> {
108+
// Read the original file content to preserve exact formatting
109+
const originalContent = await fs.readFile(filePath, "utf-8");
110+
111+
// Parse the frontmatter
112+
const { data: frontmatter } = matter(originalContent);
113+
114+
// Check if the description already exists and is the same
115+
if (frontmatter.description === description) {
116+
console.log(
117+
`⏭️ Skipped ${path.relative(process.cwd(), filePath)} (description unchanged)`,
118+
);
119+
return false;
120+
}
121+
122+
// Instead of using matter.stringify which might change date formats,
123+
// we'll manually update just the description field in the original content
124+
125+
// Extract the frontmatter section (between the first two --- markers)
126+
const frontmatterMatch = originalContent.match(/^---\r?\n([\s\S]*?)\r?\n---/);
127+
if (!frontmatterMatch) {
128+
console.error(`Could not extract frontmatter from ${filePath}`);
129+
return false;
130+
}
131+
132+
const originalFrontmatter = frontmatterMatch[1];
133+
134+
// Check if description already exists in the frontmatter
135+
const descriptionRegex = /^description:.*$(\r?\n(?: .*$)*)/m;
136+
let newFrontmatter: string;
137+
138+
if (descriptionRegex.test(originalFrontmatter)) {
139+
// Replace existing description
140+
newFrontmatter = originalFrontmatter.replace(
141+
descriptionRegex,
142+
`description: >-\n ${description.replace(/\n/g, "\n ")}`,
143+
);
144+
} else {
145+
// Add description at the end of frontmatter
146+
newFrontmatter = `${originalFrontmatter.trim()}\ndescription: >-\n ${description.replace(/\n/g, "\n ")}`;
147+
}
148+
149+
// Replace the frontmatter in the original content
150+
const updatedContent = originalContent.replace(
151+
/^---\r?\n[\s\S]*?\r?\n---/,
152+
`---\n${newFrontmatter}\n---`,
153+
);
154+
155+
// Write updated content back to file
156+
await fs.writeFile(filePath, updatedContent, "utf-8");
157+
158+
console.log(`✅ Updated ${path.relative(process.cwd(), filePath)}`);
159+
160+
return true;
161+
}
162+
163+
/**
164+
* Parse command line arguments
165+
*/
166+
function parseArgs() {
167+
const args = process.argv.slice(2);
168+
let pcxContentType: string | undefined;
169+
let showHelp = false;
170+
171+
for (let i = 0; i < args.length; i++) {
172+
if (args[i] === "--pcx-content-type" && i + 1 < args.length) {
173+
pcxContentType = args[i + 1];
174+
i++; // Skip the next argument as it's the value
175+
} else if (args[i] === "--help" || args[i] === "-h") {
176+
showHelp = true;
177+
}
178+
}
179+
180+
return { pcxContentType, showHelp };
181+
}
182+
183+
/**
184+
* Main function
185+
*/
186+
function showUsage() {
187+
console.log(`
188+
Usage: npx tsx bin/generate-descriptions.ts [options]
189+
190+
Options:
191+
--pcx-content-type <type> Filter MDX files by pcx_content_type (e.g., overview, tutorial, navigation)
192+
--help, -h Show this help message
193+
`);
194+
}
195+
196+
async function main() {
197+
// Parse command line arguments
198+
const { pcxContentType, showHelp } = parseArgs();
199+
200+
if (showHelp) {
201+
showUsage();
202+
return;
203+
}
204+
205+
if (pcxContentType) {
206+
console.log(`Filtering by pcx_content_type: ${pcxContentType}`);
207+
}
208+
try {
209+
// Find all MDX files in the docs directory
210+
const mdxFiles = await globby("**/*.mdx", {
211+
cwd: DOCS_DIR,
212+
absolute: true,
213+
});
214+
console.log(`Found ${mdxFiles.length} MDX files in the docs directory`);
215+
216+
// Filter files by pcx_content_type if specified
217+
let filteredMdxFiles = mdxFiles;
218+
if (pcxContentType) {
219+
filteredMdxFiles = [];
220+
for (const mdxFile of mdxFiles) {
221+
try {
222+
const content = await fs.readFile(mdxFile, "utf-8");
223+
const { data: frontmatter } = matter(content);
224+
if (frontmatter.pcx_content_type === pcxContentType) {
225+
filteredMdxFiles.push(mdxFile);
226+
}
227+
} catch (error) {
228+
console.error(`Error reading ${mdxFile}:`, error);
229+
}
230+
}
231+
console.log(
232+
`Filtered to ${filteredMdxFiles.length} MDX files with pcx_content_type: ${pcxContentType}`,
233+
);
234+
}
235+
236+
let updatedCount = 0;
237+
let skippedExistingCount = 0;
238+
let skippedUnchangedCount = 0;
239+
let errorCount = 0;
240+
241+
for (const mdxFile of filteredMdxFiles) {
242+
try {
243+
// Parse frontmatter
244+
const content = await fs.readFile(mdxFile, "utf-8");
245+
const { data: frontmatter } = matter(content);
246+
247+
// Skip if description already exists
248+
if (frontmatter.description) {
249+
skippedExistingCount++;
250+
continue;
251+
}
252+
253+
// Get the rendered markdown path
254+
const renderedPath = getRenderedPath(mdxFile);
255+
256+
// Check if rendered markdown exists
257+
try {
258+
await fs.access(renderedPath);
259+
} catch (error) {
260+
console.log(error);
261+
console.warn(
262+
`⚠️ Rendered markdown not found for ${path.relative(process.cwd(), mdxFile)}`,
263+
);
264+
errorCount++;
265+
continue;
266+
}
267+
268+
// Read rendered markdown content
269+
const markdownContent = await fs.readFile(renderedPath, "utf-8");
270+
271+
if (!markdownContent.trim()) {
272+
console.warn(
273+
`⚠️ Empty markdown content found for ${path.relative(process.cwd(), mdxFile)}`,
274+
);
275+
errorCount++;
276+
continue;
277+
}
278+
279+
// Generate description using localhost API
280+
const description = await generateDescriptionFromAPI(markdownContent);
281+
282+
// Skip if no description could be generated
283+
if (!description) {
284+
console.warn(
285+
`⚠️ Could not generate description for ${path.relative(process.cwd(), mdxFile)}`,
286+
);
287+
errorCount++;
288+
continue;
289+
}
290+
291+
// Update frontmatter
292+
const wasUpdated = await updateFrontmatter(mdxFile, description);
293+
if (wasUpdated) {
294+
updatedCount++;
295+
} else {
296+
skippedUnchangedCount++;
297+
}
298+
} catch (error) {
299+
console.error(
300+
`❌ Error processing ${path.relative(process.cwd(), mdxFile)}:`,
301+
error,
302+
);
303+
errorCount++;
304+
}
305+
}
306+
307+
console.log("\n--- Summary ---");
308+
console.log(`Total MDX files: ${mdxFiles.length}`);
309+
console.log(`Updated: ${updatedCount}`);
310+
console.log(`Skipped (already had description): ${skippedExistingCount}`);
311+
console.log(`Skipped (description unchanged): ${skippedUnchangedCount}`);
312+
console.log(`Errors: ${errorCount}`);
313+
} catch (error) {
314+
console.error("Error:", error);
315+
process.exit(1);
316+
}
317+
}
318+
319+
main();

0 commit comments

Comments
 (0)