1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Extract metadata from all MDX files and save to JSON
5
+ * This runs during the build process to avoid runtime MDX parsing
6
+ * Hybrid approach: uses content-handlers for directory discovery, regex for metadata parsing
7
+ */
8
+
9
+ import path from 'path' ;
10
+ import fs from 'fs' ;
11
+ import { getContentSlugs } from '../src/lib/content-handlers.js' ;
12
+
13
+ const CONTENT_DIR = path . join ( process . cwd ( ) , 'src' , 'content' ) ;
14
+ const OUTPUT_FILE = path . join ( process . cwd ( ) , 'metadata-cache.json' ) ;
15
+
16
+ // Content types to process
17
+ const CONTENT_TYPES = [ 'blog' , 'videos' , 'learn/courses' , 'comparisons' ] ;
18
+
19
+ // Simple regex-based extraction that's more targeted (from original approach)
20
+ function extractMetadataFromCreateMetadata ( content : string ) {
21
+ // Find the createMetadata call specifically
22
+ const createMetadataMatch = content . match ( / e x p o r t \s + c o n s t \s + m e t a d a t a \s * = \s * c r e a t e M e t a d a t a \s * \( \s * \{ ( [ \s \S ] * ?) \} \s * \) / ) ;
23
+
24
+ if ( ! createMetadataMatch ) {
25
+ return null ;
26
+ }
27
+
28
+ const metadataContent = createMetadataMatch [ 1 ] ;
29
+ const metadata : Record < string , any > = { } ;
30
+
31
+ // Extract title
32
+ const titleMatch = metadataContent . match ( / t i t l e : \s * [ ' " ` ] ( [ ^ ' " ` ] * ?) [ ' " ` ] / ) ;
33
+ if ( titleMatch ) {
34
+ metadata . title = titleMatch [ 1 ] ;
35
+ }
36
+
37
+ // Extract description - handle multiline and quotes carefully
38
+ let descriptionMatch = metadataContent . match ( / d e s c r i p t i o n : \s * [ ' " ` ] ( [ \s \S ] * ?) [ ' " ` ] / ) ;
39
+ if ( descriptionMatch ) {
40
+ metadata . description = descriptionMatch [ 1 ] ;
41
+ }
42
+
43
+ // Extract author
44
+ const authorMatch = metadataContent . match ( / a u t h o r : \s * [ ' " ` ] ( [ ^ ' " ` ] * ?) [ ' " ` ] / ) ;
45
+ if ( authorMatch ) {
46
+ metadata . author = authorMatch [ 1 ] ;
47
+ }
48
+
49
+ // Extract date
50
+ const dateMatch = metadataContent . match ( / d a t e : \s * [ ' " ` ] ( [ ^ ' " ` ] * ?) [ ' " ` ] / ) ;
51
+ if ( dateMatch ) {
52
+ metadata . date = dateMatch [ 1 ] ;
53
+ }
54
+
55
+ // Extract type
56
+ const typeMatch = metadataContent . match ( / t y p e : \s * [ ' " ` ] ( [ ^ ' " ` ] * ?) [ ' " ` ] / ) ;
57
+ if ( typeMatch ) {
58
+ metadata . type = typeMatch [ 1 ] ;
59
+ }
60
+
61
+ // Extract image (this is an identifier, not a string)
62
+ const imageMatch = metadataContent . match ( / i m a g e : \s * ( [ a - z A - Z _ $ ] [ a - z A - Z 0 - 9 _ $ ] * ) , ? / ) ;
63
+ if ( imageMatch ) {
64
+ metadata . imageRef = imageMatch [ 1 ] ;
65
+
66
+ // Try to resolve the image import
67
+ const importMatch = content . match ( new RegExp ( `import\\s+${ imageMatch [ 1 ] } \\s+from\\s+['"\`]@/images/([^'"\`]+)['"\`]` ) ) ;
68
+ if ( importMatch ) {
69
+ const imagePath = importMatch [ 1 ] ;
70
+ const imagePathWithoutExt = imagePath . split ( '.' ) [ 0 ] ;
71
+ metadata . image = `/_next/static/media/${ imagePathWithoutExt } .webp` ;
72
+ }
73
+ }
74
+
75
+ return metadata ;
76
+ }
77
+
78
+ /**
79
+ * Extract metadata using hybrid approach: content-handlers for discovery, regex for parsing
80
+ */
81
+ async function extractAllMetadata ( ) {
82
+ const allMetadata : Record < string , any > = { } ;
83
+ let totalProcessed = 0 ;
84
+ let totalFound = 0 ;
85
+
86
+ console . log ( 'Starting metadata extraction using hybrid approach...' ) ;
87
+ console . log ( 'Using content-handlers for directory discovery, regex for metadata parsing' ) ;
88
+
89
+ for ( const contentType of CONTENT_TYPES ) {
90
+ console . log ( `\nProcessing content type: ${ contentType } ` ) ;
91
+
92
+ try {
93
+ // Use content-handlers to get all directory slugs (more reliable than manual fs operations)
94
+ const directorySlugs = getContentSlugs ( contentType ) ;
95
+ console . log ( `Found ${ directorySlugs . length } items in ${ contentType } ` ) ;
96
+
97
+ for ( const directorySlug of directorySlugs ) {
98
+ const mdxPath = path . join ( CONTENT_DIR , contentType , directorySlug , 'page.mdx' ) ;
99
+
100
+ if ( fs . existsSync ( mdxPath ) ) {
101
+ try {
102
+ const content = fs . readFileSync ( mdxPath , 'utf-8' ) ;
103
+ const metadata = extractMetadataFromCreateMetadata ( content ) ;
104
+
105
+ if ( metadata ) {
106
+ const key = `${ contentType } /${ directorySlug } ` ;
107
+ allMetadata [ key ] = {
108
+ ...metadata ,
109
+ slug : `/${ contentType } /${ directorySlug } ` ,
110
+ type : metadata . type || contentType
111
+ } ;
112
+ console . log ( `✓ Extracted metadata for ${ key } : "${ metadata . title } "` ) ;
113
+ totalFound ++ ;
114
+ } else {
115
+ console . log ( `⚠ No createMetadata found in ${ contentType } /${ directorySlug } ` ) ;
116
+ }
117
+ totalProcessed ++ ;
118
+ } catch ( error : any ) {
119
+ console . error ( `✗ Error processing ${ contentType } /${ directorySlug } :` , error . message ) ;
120
+ totalProcessed ++ ;
121
+ }
122
+ }
123
+ }
124
+ } catch ( error : any ) {
125
+ console . error ( `✗ Error processing content type ${ contentType } :` , error . message ) ;
126
+ }
127
+ }
128
+
129
+ // Write to JSON file
130
+ fs . writeFileSync ( OUTPUT_FILE , JSON . stringify ( allMetadata , null , 2 ) ) ;
131
+ console . log ( `\n✓ Successfully extracted metadata for ${ totalFound } /${ totalProcessed } items to ${ OUTPUT_FILE } ` ) ;
132
+ console . log ( `Cache contains ${ Object . keys ( allMetadata ) . length } entries` ) ;
133
+
134
+ return allMetadata ;
135
+ }
136
+
137
+ // Run if called directly
138
+ if ( require . main === module ) {
139
+ extractAllMetadata ( ) . catch ( console . error ) ;
140
+ }
141
+
142
+ export { extractAllMetadata } ;
0 commit comments