-
Notifications
You must be signed in to change notification settings - Fork 9.9k
[Docs site] Add generate descriptions util + backfill values #25731
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 7 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
4f26aac
Edits
kodster28 5440f94
final files
kodster28 3b9e6f9
remove tab
kodster28 da7ffc1
fix more tabs
kodster28 5812ec3
fixed steps
kodster28 42d8ca4
fix
kodster28 655f9fd
final fix
kodster28 946e5bd
Update src/content/docs/cloudflare-one/tutorials/okta-u2f.mdx
kodster28 06b66fc
Update src/content/docs/cloudflare-one/tutorials/vnc-client-in-browse…
kodster28 2be4b47
Update src/content/docs/r2/tutorials/cloudflare-access.mdx
kodster28 4815741
Update src/content/docs/pages/how-to/deploy-a-wordpress-site.mdx
kodster28 4e2e16e
Update src/content/docs/pages/migrations/migrating-from-netlify.mdx
kodster28 8d62110
Update src/content/docs/reference-architecture/design-guides/network-…
kodster28 496a602
Update src/content/docs/ssl/keyless-ssl/hardware-security-modules/ibm…
kodster28 19ca7d5
Update src/content/docs/turnstile/tutorials/excluding-turnstile-from-…
kodster28 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,319 @@ | ||
| #!/usr/bin/env tsx | ||
|
|
||
| /** | ||
| * This script generates descriptions for MDX files in the docs directory | ||
| * that don't have a description field in their frontmatter. | ||
| * | ||
| * It uses the rendered markdown from the distmd directory to generate descriptions | ||
| * by sending the content to a localhost:8787 application. | ||
| * | ||
| * To run, you'll need to do the following: | ||
| * 1. Get your local build setup: | ||
| * 1. Run `npm run build` to build the local docs. | ||
| * 2. Run `npx tsx bin/generate-index-md.ts` to generate the index.md files (saves on tokens) + avoids extra HTML. | ||
| * 2. Have a local Worker running on `localhost:8787` with the following code (also requires adding a binding in the Wrangler config file): | ||
| * | ||
| * ``` | ||
| * export interface Env { | ||
| * AI: Ai; | ||
| * } | ||
|
|
||
| * export default { | ||
| * async fetch(request, env): Promise<Response> { | ||
| * const response = await env.AI.run("@cf/facebook/bart-large-cnn", { | ||
| * input_text: await request.text(), | ||
| * max_length: 60 | ||
| * }); | ||
| * return Response.json(response.summary); | ||
| * }, | ||
| * } satisfies ExportedHandler<Env>; | ||
| * ``` | ||
| * 3. Run `npx tsx bin/generate-descriptions.ts --pcx-content-type $TYPE` to generate the descriptions. | ||
| * | ||
| */ | ||
|
|
||
| import fs from "fs/promises"; | ||
| import path from "path"; | ||
| import globby from "fast-glob"; | ||
| import matter from "gray-matter"; | ||
|
|
||
| const DOCS_DIR = path.join(process.cwd(), "src/content/docs"); | ||
| const DISTMD_DIR = path.join(process.cwd(), "distmd"); | ||
|
|
||
| // Localhost application URL | ||
| const LOCALHOST_URL = "http://localhost:8787"; | ||
|
|
||
| /** | ||
| * Sends text content to localhost application and receives description back | ||
| */ | ||
| async function generateDescriptionFromAPI( | ||
| content: string, | ||
| ): Promise<string | undefined> { | ||
| try { | ||
| const response = await fetch(LOCALHOST_URL, { | ||
| method: "POST", | ||
| headers: { | ||
| "Content-Type": "text/plain", | ||
| }, | ||
| body: content, | ||
| }); | ||
|
|
||
| if (!response.ok) { | ||
| throw new Error(`HTTP error! status: ${response.status}`); | ||
| } | ||
|
|
||
| const description = await response.text(); | ||
| // Remove surrounding quotes and all square brackets | ||
| let trimmed = description.trim(); | ||
|
|
||
| // Remove surrounding quotes | ||
| if ( | ||
| (trimmed.startsWith('"') && trimmed.endsWith('"')) || | ||
| (trimmed.startsWith("'") && trimmed.endsWith("'")) | ||
| ) { | ||
| trimmed = trimmed.slice(1, -1); | ||
| } | ||
|
|
||
| // Remove all square brackets from the text | ||
| trimmed = trimmed.replace(/\[|\]/g, ""); | ||
|
|
||
| return trimmed.trim(); | ||
| } catch (error) { | ||
| console.error("Error calling localhost API:", error); | ||
| return undefined; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Gets the rendered markdown path for a docs file | ||
| */ | ||
| function getRenderedPath(docPath: string): string { | ||
| // Convert /src/content/docs/product/path/file.mdx to /distmd/product/path/file/index.md | ||
| const relativePath = path.relative(DOCS_DIR, docPath); | ||
| const pathWithoutExt = relativePath.replace(/\.mdx$/, ""); | ||
kodster28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| const filename = path.basename(pathWithoutExt); | ||
| const dirPath = path.dirname(pathWithoutExt); | ||
| return path.join(DISTMD_DIR, dirPath, filename, "index.md"); | ||
| } | ||
|
|
||
| /** | ||
| * Updates the frontmatter of an MDX file with a description | ||
| * Ensures that only the description field is modified and all other fields remain unchanged | ||
| * @returns boolean indicating whether the file was updated (true) or skipped (false) | ||
| */ | ||
| async function updateFrontmatter( | ||
| filePath: string, | ||
| description: string, | ||
| ): Promise<boolean> { | ||
| // Read the original file content to preserve exact formatting | ||
| const originalContent = await fs.readFile(filePath, "utf-8"); | ||
|
|
||
| // Parse the frontmatter | ||
| const { data: frontmatter } = matter(originalContent); | ||
|
|
||
| // Check if the description already exists and is the same | ||
| if (frontmatter.description === description) { | ||
| console.log( | ||
| `⏭️ Skipped ${path.relative(process.cwd(), filePath)} (description unchanged)`, | ||
| ); | ||
| return false; | ||
| } | ||
|
|
||
| // Instead of using matter.stringify which might change date formats, | ||
| // we'll manually update just the description field in the original content | ||
|
|
||
| // Extract the frontmatter section (between the first two --- markers) | ||
| const frontmatterMatch = originalContent.match(/^---\r?\n([\s\S]*?)\r?\n---/); | ||
| if (!frontmatterMatch) { | ||
| console.error(`Could not extract frontmatter from ${filePath}`); | ||
| return false; | ||
| } | ||
|
|
||
| const originalFrontmatter = frontmatterMatch[1]; | ||
|
|
||
| // Check if description already exists in the frontmatter | ||
| const descriptionRegex = /^description:.*$(\r?\n(?: .*$)*)/m; | ||
| let newFrontmatter: string; | ||
|
|
||
| if (descriptionRegex.test(originalFrontmatter)) { | ||
| // Replace existing description | ||
| newFrontmatter = originalFrontmatter.replace( | ||
| descriptionRegex, | ||
| `description: >-\n ${description.replace(/\n/g, "\n ")}`, | ||
| ); | ||
| } else { | ||
| // Add description at the end of frontmatter | ||
| newFrontmatter = `${originalFrontmatter.trim()}\ndescription: >-\n ${description.replace(/\n/g, "\n ")}`; | ||
| } | ||
|
|
||
| // Replace the frontmatter in the original content | ||
| const updatedContent = originalContent.replace( | ||
| /^---\r?\n[\s\S]*?\r?\n---/, | ||
| `---\n${newFrontmatter}\n---`, | ||
| ); | ||
|
|
||
| // Write updated content back to file | ||
| await fs.writeFile(filePath, updatedContent, "utf-8"); | ||
|
|
||
| console.log(`✅ Updated ${path.relative(process.cwd(), filePath)}`); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| /** | ||
| * Parse command line arguments | ||
| */ | ||
| function parseArgs() { | ||
| const args = process.argv.slice(2); | ||
| let pcxContentType: string | undefined; | ||
| let showHelp = false; | ||
|
|
||
| for (let i = 0; i < args.length; i++) { | ||
| if (args[i] === "--pcx-content-type" && i + 1 < args.length) { | ||
| pcxContentType = args[i + 1]; | ||
| i++; // Skip the next argument as it's the value | ||
| } else if (args[i] === "--help" || args[i] === "-h") { | ||
| showHelp = true; | ||
| } | ||
| } | ||
|
|
||
| return { pcxContentType, showHelp }; | ||
| } | ||
|
|
||
| /** | ||
| * Main function | ||
| */ | ||
| function showUsage() { | ||
| console.log(` | ||
| Usage: npx tsx bin/generate-descriptions.ts [options] | ||
|
|
||
| Options: | ||
| --pcx-content-type <type> Filter MDX files by pcx_content_type (e.g., overview, tutorial, navigation) | ||
| --help, -h Show this help message | ||
| `); | ||
| } | ||
|
|
||
| async function main() { | ||
| // Parse command line arguments | ||
| const { pcxContentType, showHelp } = parseArgs(); | ||
|
|
||
| if (showHelp) { | ||
| showUsage(); | ||
| return; | ||
| } | ||
|
|
||
| if (pcxContentType) { | ||
| console.log(`Filtering by pcx_content_type: ${pcxContentType}`); | ||
| } | ||
| try { | ||
| // Find all MDX files in the docs directory | ||
| const mdxFiles = await globby("**/*.mdx", { | ||
| cwd: DOCS_DIR, | ||
| absolute: true, | ||
| }); | ||
| console.log(`Found ${mdxFiles.length} MDX files in the docs directory`); | ||
|
|
||
| // Filter files by pcx_content_type if specified | ||
| let filteredMdxFiles = mdxFiles; | ||
| if (pcxContentType) { | ||
| filteredMdxFiles = []; | ||
| for (const mdxFile of mdxFiles) { | ||
| try { | ||
| const content = await fs.readFile(mdxFile, "utf-8"); | ||
| const { data: frontmatter } = matter(content); | ||
| if (frontmatter.pcx_content_type === pcxContentType) { | ||
| filteredMdxFiles.push(mdxFile); | ||
| } | ||
| } catch (error) { | ||
| console.error(`Error reading ${mdxFile}:`, error); | ||
| } | ||
| } | ||
| console.log( | ||
| `Filtered to ${filteredMdxFiles.length} MDX files with pcx_content_type: ${pcxContentType}`, | ||
| ); | ||
| } | ||
|
|
||
| let updatedCount = 0; | ||
| let skippedExistingCount = 0; | ||
| let skippedUnchangedCount = 0; | ||
| let errorCount = 0; | ||
|
|
||
| for (const mdxFile of filteredMdxFiles) { | ||
| try { | ||
| // Parse frontmatter | ||
| const content = await fs.readFile(mdxFile, "utf-8"); | ||
| const { data: frontmatter } = matter(content); | ||
|
|
||
| // Skip if description already exists | ||
| if (frontmatter.description) { | ||
| skippedExistingCount++; | ||
| continue; | ||
| } | ||
|
|
||
| // Get the rendered markdown path | ||
| const renderedPath = getRenderedPath(mdxFile); | ||
|
|
||
| // Check if rendered markdown exists | ||
| try { | ||
| await fs.access(renderedPath); | ||
| } catch (error) { | ||
| console.log(error); | ||
| console.warn( | ||
| `⚠️ Rendered markdown not found for ${path.relative(process.cwd(), mdxFile)}`, | ||
| ); | ||
| errorCount++; | ||
| continue; | ||
| } | ||
|
|
||
| // Read rendered markdown content | ||
| const markdownContent = await fs.readFile(renderedPath, "utf-8"); | ||
|
|
||
| if (!markdownContent.trim()) { | ||
| console.warn( | ||
| `⚠️ Empty markdown content found for ${path.relative(process.cwd(), mdxFile)}`, | ||
| ); | ||
| errorCount++; | ||
| continue; | ||
| } | ||
|
|
||
| // Generate description using localhost API | ||
| const description = await generateDescriptionFromAPI(markdownContent); | ||
|
|
||
| // Skip if no description could be generated | ||
| if (!description) { | ||
| console.warn( | ||
| `⚠️ Could not generate description for ${path.relative(process.cwd(), mdxFile)}`, | ||
| ); | ||
| errorCount++; | ||
| continue; | ||
| } | ||
|
|
||
| // Update frontmatter | ||
| const wasUpdated = await updateFrontmatter(mdxFile, description); | ||
| if (wasUpdated) { | ||
| updatedCount++; | ||
| } else { | ||
| skippedUnchangedCount++; | ||
| } | ||
| } catch (error) { | ||
| console.error( | ||
| `❌ Error processing ${path.relative(process.cwd(), mdxFile)}:`, | ||
| error, | ||
| ); | ||
| errorCount++; | ||
| } | ||
| } | ||
|
|
||
| console.log("\n--- Summary ---"); | ||
| console.log(`Total MDX files: ${mdxFiles.length}`); | ||
| console.log(`Updated: ${updatedCount}`); | ||
| console.log(`Skipped (already had description): ${skippedExistingCount}`); | ||
| console.log(`Skipped (description unchanged): ${skippedUnchangedCount}`); | ||
| console.log(`Errors: ${errorCount}`); | ||
| } catch (error) { | ||
| console.error("Error:", error); | ||
| process.exit(1); | ||
| } | ||
| } | ||
|
|
||
| main(); | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.