Skip to content

Commit 33d3ab2

Browse files
authored
Fix snippet-final-v5-(2).jpg (#14127)
* Fix snippet-final-v5-(2).jpg * Clean up * State consistency * Fix tables rendering * Fix copy button * Address codeql suggestions
1 parent a7e8cc0 commit 33d3ab2

File tree

12 files changed

+797
-399
lines changed

12 files changed

+797
-399
lines changed

gatsby/onCreateNode.ts

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import slugify from 'slugify'
66
import { JSDOM } from 'jsdom'
77
import { GatsbyNode } from 'gatsby'
88
import { PAGEVIEW_CACHE_KEY } from './onPreBootstrap'
9-
import { resolveSnippets } from './snippetUtils'
109

1110
require('dotenv').config({
1211
path: `.env.${process.env.NODE_ENV}`,
@@ -269,19 +268,6 @@ export const onCreateNode: GatsbyNode['onCreateNode'] = async ({
269268
console.error(`Error fetching input_schema for ${templateIds}: ${error}`)
270269
}
271270
}
272-
273-
const contentWithoutFrontmatter = stripFrontmatter(node.rawBody)
274-
const contentWithSnippets = resolveSnippets(contentWithoutFrontmatter, node.fileAbsolutePath)
275-
276-
// Prepend title as H1 if it exists
277-
const title = node.frontmatter?.title
278-
const contentWithSnippetsAndTitle = title ? `# ${title}\n\n${contentWithSnippets}` : contentWithSnippets
279-
280-
createNodeField({
281-
node,
282-
name: `contentWithSnippets`,
283-
value: contentWithSnippetsAndTitle,
284-
})
285271
}
286272

287273
if (node.internal.type === 'Plugin' && new URL(node.url).hostname === 'github.com' && process.env.GITHUB_API_KEY) {

gatsby/onPostBuild.ts

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ const createOrUpdateStrapiPosts = async (posts, roadmaps) => {
482482
)
483483
}
484484

485-
export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql }) => {
485+
export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter }) => {
486486
// Generate API spec markdown files first
487487
try {
488488
const openApiSpecUrl = process.env.POSTHOG_OPEN_API_SPEC_URL || 'https://app.posthog.com/api/schema/'
@@ -555,26 +555,24 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql }) => {
555555
generateSdkReferencesMarkdown(node)
556556
})
557557

558-
// Generate markdown files for llms.txt file and LLM ingestion (after API spec files exist)
559-
const markdownQuery = await graphql(`
560-
query pagesForMarkdown {
561-
allMdx {
558+
// Generate markdown files for llms.txt file and LLM ingestion (after pages are built)
559+
// Convert HTML files to markdown using turndown
560+
const docsQuery = (await graphql(`
561+
query {
562+
allMdx(filter: { fields: { slug: { regex: "/^/docs/" } } }) {
562563
nodes {
563-
frontmatter {
564-
title
565-
date
566-
}
567-
rawBody
568564
fields {
569565
slug
570-
contentWithSnippets
566+
}
567+
frontmatter {
568+
title
571569
}
572570
}
573571
}
574572
}
575-
`)
573+
`)) as { data: { allMdx: { nodes: Array<{ fields: { slug: string }; frontmatter: { title: string } }> } } }
576574

577-
const filteredPages = await generateRawMarkdownPages(markdownQuery.data.allMdx.nodes)
575+
const filteredPages = await generateRawMarkdownPages(docsQuery.data.allMdx.nodes)
578576
generateLlmsTxt(filteredPages)
579577

580578
if (process.env.AWS_CODEPIPELINE !== 'true') {

gatsby/rawMarkdownUtils.ts

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,19 @@ import path from 'path'
22
import fs from 'fs'
33
import { SdkReferenceData } from '../src/templates/sdk/SdkReference'
44
import { getLanguageFromSdkId } from '../src/components/SdkReferences/utils'
5+
import {
6+
createTurndownService,
7+
extractTitleFromHtml,
8+
extractMainContent,
9+
postProcessMarkdown,
10+
preprocessHtmlForTabs,
11+
} from './turndownService'
12+
13+
export const generateRawMarkdownPages = async (
14+
docsNodes: Array<{ fields: { slug: string }; frontmatter: { title: string } }>
15+
) => {
16+
const publicPath = path.resolve(__dirname, '../public')
517

6-
// Function to generate raw markdown files
7-
export const generateRawMarkdownPages = async (pages) => {
8-
console.log('Generating markdown files for LLMs...')
9-
10-
// Filter out any pages with certain slugs
1118
const excludeTerms = [
1219
'/_snippets',
1320
'/snippets/',
@@ -23,53 +30,49 @@ export const generateRawMarkdownPages = async (pages) => {
2330
'/startups',
2431
'/example-components',
2532
]
26-
const filteredPages = pages.filter((doc) => !excludeTerms.some((term) => doc.fields.slug.includes(term)))
2733

28-
console.log(`Found ${filteredPages.length} docs to generate markdown for (filtered from ${pages.length} total)`)
34+
const filteredNodes = docsNodes.filter((node) => {
35+
return !excludeTerms.some((term) => node.fields.slug.includes(term))
36+
})
37+
38+
const processedPages: Array<{ slug: string; title: string }> = []
2939

30-
for (const doc of filteredPages) {
40+
for (const node of filteredNodes) {
3141
try {
32-
const { slug, contentWithSnippets } = doc.fields
33-
const { title } = doc.frontmatter
34-
const body = contentWithSnippets || doc.rawBody
35-
36-
// Create the frontmatter, so it always has the page title
37-
let markdownContent = `---\ntitle: ${title}\nslug: ${slug}\n---\n`
38-
39-
// Add the content
40-
if (body) {
41-
// Process internal links to point to .md equivalents
42-
let processedBody = body.replace(/\[([^\]]+)\]\(\/([^)]+)\)/g, (match, text, path) => {
43-
// Only convert if the path doesn't already end with .md
44-
if (!path.endsWith('.md')) {
45-
return `[${text}](/${path}.md)`
46-
}
47-
return match
48-
})
42+
const { slug } = node.fields
43+
const htmlFilePath = path.join(publicPath, slug, 'index.html')
4944

50-
markdownContent += processedBody
45+
if (!fs.existsSync(htmlFilePath)) {
46+
continue
5147
}
5248

53-
// Create the directory structure
54-
const publicPath = path.resolve(__dirname, '../public')
55-
const filePath = path.join(publicPath, `${slug}.md`)
56-
const dirPath = path.dirname(filePath)
49+
const html = fs.readFileSync(htmlFilePath, 'utf8')
50+
const title = extractTitleFromHtml(html) || node.frontmatter.title || 'Untitled'
51+
const mainContent = extractMainContent(html)
52+
const preprocessedContent = preprocessHtmlForTabs(mainContent)
53+
54+
const turndownService = createTurndownService(title)
55+
let markdown = turndownService.turndown(preprocessedContent)
56+
markdown = postProcessMarkdown(markdown, title)
57+
58+
const outputPath = path.join(publicPath, `${slug}.md`)
59+
const dirPath = path.dirname(outputPath)
5760

58-
// Ensure directory exists
5961
if (!fs.existsSync(dirPath)) {
6062
fs.mkdirSync(dirPath, { recursive: true })
6163
}
6264

63-
// Write the file
64-
fs.writeFileSync(filePath, markdownContent, 'utf8')
65-
console.log(`Generated: ${slug}.md`)
65+
fs.writeFileSync(outputPath, markdown, 'utf8')
66+
processedPages.push({ slug, title })
6667
} catch (error) {
67-
console.error(`Error generating markdown for ${doc.fields.slug}:`, error)
68+
continue
6869
}
6970
}
7071

71-
// Return filtered pages for use in generateLlmsTxt
72-
return filteredPages
72+
return processedPages.map((page) => ({
73+
fields: { slug: page.slug },
74+
frontmatter: { title: page.title },
75+
}))
7376
}
7477

7578
// Function to generate individual API endpoint markdown files from the OpenAPI spec

gatsby/snippetUtils.ts

Lines changed: 0 additions & 66 deletions
This file was deleted.

0 commit comments

Comments
 (0)