@@ -2,12 +2,19 @@ import path from 'path'
22import fs from 'fs'
33import { SdkReferenceData } from '../src/templates/sdk/SdkReference'
44import { getLanguageFromSdkId } from '../src/components/SdkReferences/utils'
5+ import {
6+ createTurndownService ,
7+ extractTitleFromHtml ,
8+ extractMainContent ,
9+ postProcessMarkdown ,
10+ preprocessHtmlForTabs ,
11+ } from './turndownService'
12+
13+ export const generateRawMarkdownPages = async (
14+ docsNodes : Array < { fields : { slug : string } ; frontmatter : { title : string } } >
15+ ) => {
16+ const publicPath = path . resolve ( __dirname , '../public' )
517
6- // Function to generate raw markdown files
7- export const generateRawMarkdownPages = async ( pages ) => {
8- console . log ( 'Generating markdown files for LLMs...' )
9-
10- // Filter out any pages with certain slugs
1118 const excludeTerms = [
1219 '/_snippets' ,
1320 '/snippets/' ,
@@ -23,53 +30,49 @@ export const generateRawMarkdownPages = async (pages) => {
2330 '/startups' ,
2431 '/example-components' ,
2532 ]
26- const filteredPages = pages . filter ( ( doc ) => ! excludeTerms . some ( ( term ) => doc . fields . slug . includes ( term ) ) )
2733
28- console . log ( `Found ${ filteredPages . length } docs to generate markdown for (filtered from ${ pages . length } total)` )
34+ const filteredNodes = docsNodes . filter ( ( node ) => {
35+ return ! excludeTerms . some ( ( term ) => node . fields . slug . includes ( term ) )
36+ } )
37+
38+ const processedPages : Array < { slug : string ; title : string } > = [ ]
2939
30- for ( const doc of filteredPages ) {
40+ for ( const node of filteredNodes ) {
3141 try {
32- const { slug, contentWithSnippets } = doc . fields
33- const { title } = doc . frontmatter
34- const body = contentWithSnippets || doc . rawBody
35-
36- // Create the frontmatter, so it always has the page title
37- let markdownContent = `---\ntitle: ${ title } \nslug: ${ slug } \n---\n`
38-
39- // Add the content
40- if ( body ) {
41- // Process internal links to point to .md equivalents
42- let processedBody = body . replace ( / \[ ( [ ^ \] ] + ) \] \( \/ ( [ ^ ) ] + ) \) / g, ( match , text , path ) => {
43- // Only convert if the path doesn't already end with .md
44- if ( ! path . endsWith ( '.md' ) ) {
45- return `[${ text } ](/${ path } .md)`
46- }
47- return match
48- } )
42+ const { slug } = node . fields
43+ const htmlFilePath = path . join ( publicPath , slug , 'index.html' )
4944
50- markdownContent += processedBody
45+ if ( ! fs . existsSync ( htmlFilePath ) ) {
46+ continue
5147 }
5248
53- // Create the directory structure
54- const publicPath = path . resolve ( __dirname , '../public' )
55- const filePath = path . join ( publicPath , `${ slug } .md` )
56- const dirPath = path . dirname ( filePath )
49+ const html = fs . readFileSync ( htmlFilePath , 'utf8' )
50+ const title = extractTitleFromHtml ( html ) || node . frontmatter . title || 'Untitled'
51+ const mainContent = extractMainContent ( html )
52+ const preprocessedContent = preprocessHtmlForTabs ( mainContent )
53+
54+ const turndownService = createTurndownService ( title )
55+ let markdown = turndownService . turndown ( preprocessedContent )
56+ markdown = postProcessMarkdown ( markdown , title )
57+
58+ const outputPath = path . join ( publicPath , `${ slug } .md` )
59+ const dirPath = path . dirname ( outputPath )
5760
58- // Ensure directory exists
5961 if ( ! fs . existsSync ( dirPath ) ) {
6062 fs . mkdirSync ( dirPath , { recursive : true } )
6163 }
6264
63- // Write the file
64- fs . writeFileSync ( filePath , markdownContent , 'utf8' )
65- console . log ( `Generated: ${ slug } .md` )
65+ fs . writeFileSync ( outputPath , markdown , 'utf8' )
66+ processedPages . push ( { slug, title } )
6667 } catch ( error ) {
67- console . error ( `Error generating markdown for ${ doc . fields . slug } :` , error )
68+ continue
6869 }
6970 }
7071
71- // Return filtered pages for use in generateLlmsTxt
72- return filteredPages
72+ return processedPages . map ( ( page ) => ( {
73+ fields : { slug : page . slug } ,
74+ frontmatter : { title : page . title } ,
75+ } ) )
7376}
7477
7578// Function to generate individual API endpoint markdown files from the OpenAPI spec
0 commit comments