Skip to content

Commit e527035

Browse files
committed
Update extract metdata
1 parent d0a4732 commit e527035

File tree

3 files changed

+143
-132
lines changed

3 files changed

+143
-132
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"packageManager": "[email protected]",
66
"scripts": {
77
"dev": "concurrently \"next dev\" \"pnpm stripe:webhook\"",
8-
"prebuild": "node scripts/extract-metadata.js && node scripts/check-metadata.js && node scripts/generate-collections.js",
8+
"prebuild": "tsx scripts/extract-metadata.ts && node scripts/check-metadata.js && node scripts/generate-collections.js",
99
"build": "npm run prebuild && prisma generate && (prisma migrate deploy || echo 'Database migration failed, continuing with build...') && NODE_OPTIONS=--max-old-space-size=6144 next build",
1010
"build-no-db": "npm run prebuild && prisma generate && NODE_OPTIONS=--max-old-space-size=6144 next build",
1111
"build-with-tests": "npm run test && npm run prebuild && prisma generate && prisma migrate deploy && NODE_OPTIONS=--max-old-space-size=6144 next build",

scripts/extract-metadata.js

Lines changed: 0 additions & 131 deletions
This file was deleted.

scripts/extract-metadata.ts

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* Extract metadata from all MDX files and save to JSON
5+
* This runs during the build process to avoid runtime MDX parsing
6+
* Hybrid approach: uses content-handlers for directory discovery, regex for metadata parsing
7+
*/
8+
9+
import path from 'path';
10+
import fs from 'fs';
11+
import { getContentSlugs } from '../src/lib/content-handlers.js';
12+
13+
const CONTENT_DIR = path.join(process.cwd(), 'src', 'content');
14+
const OUTPUT_FILE = path.join(process.cwd(), 'metadata-cache.json');
15+
16+
// Content types to process
17+
const CONTENT_TYPES = ['blog', 'videos', 'learn/courses', 'comparisons'];
18+
19+
// Simple regex-based extraction that's more targeted (from original approach)
20+
function extractMetadataFromCreateMetadata(content: string) {
21+
// Find the createMetadata call specifically
22+
const createMetadataMatch = content.match(/export\s+const\s+metadata\s*=\s*createMetadata\s*\(\s*\{([\s\S]*?)\}\s*\)/);
23+
24+
if (!createMetadataMatch) {
25+
return null;
26+
}
27+
28+
const metadataContent = createMetadataMatch[1];
29+
const metadata: Record<string, any> = {};
30+
31+
// Extract title
32+
const titleMatch = metadataContent.match(/title:\s*['"`]([^'"`]*?)['"`]/);
33+
if (titleMatch) {
34+
metadata.title = titleMatch[1];
35+
}
36+
37+
// Extract description - handle multiline and quotes carefully
38+
let descriptionMatch = metadataContent.match(/description:\s*['"`]([\s\S]*?)['"`]/);
39+
if (descriptionMatch) {
40+
metadata.description = descriptionMatch[1];
41+
}
42+
43+
// Extract author
44+
const authorMatch = metadataContent.match(/author:\s*['"`]([^'"`]*?)['"`]/);
45+
if (authorMatch) {
46+
metadata.author = authorMatch[1];
47+
}
48+
49+
// Extract date
50+
const dateMatch = metadataContent.match(/date:\s*['"`]([^'"`]*?)['"`]/);
51+
if (dateMatch) {
52+
metadata.date = dateMatch[1];
53+
}
54+
55+
// Extract type
56+
const typeMatch = metadataContent.match(/type:\s*['"`]([^'"`]*?)['"`]/);
57+
if (typeMatch) {
58+
metadata.type = typeMatch[1];
59+
}
60+
61+
// Extract image (this is an identifier, not a string)
62+
const imageMatch = metadataContent.match(/image:\s*([a-zA-Z_$][a-zA-Z0-9_$]*),?/);
63+
if (imageMatch) {
64+
metadata.imageRef = imageMatch[1];
65+
66+
// Try to resolve the image import
67+
const importMatch = content.match(new RegExp(`import\\s+${imageMatch[1]}\\s+from\\s+['"\`]@/images/([^'"\`]+)['"\`]`));
68+
if (importMatch) {
69+
const imagePath = importMatch[1];
70+
const imagePathWithoutExt = imagePath.split('.')[0];
71+
metadata.image = `/_next/static/media/${imagePathWithoutExt}.webp`;
72+
}
73+
}
74+
75+
return metadata;
76+
}
77+
78+
/**
79+
* Extract metadata using hybrid approach: content-handlers for discovery, regex for parsing
80+
*/
81+
async function extractAllMetadata() {
82+
const allMetadata: Record<string, any> = {};
83+
let totalProcessed = 0;
84+
let totalFound = 0;
85+
86+
console.log('Starting metadata extraction using hybrid approach...');
87+
console.log('Using content-handlers for directory discovery, regex for metadata parsing');
88+
89+
for (const contentType of CONTENT_TYPES) {
90+
console.log(`\nProcessing content type: ${contentType}`);
91+
92+
try {
93+
// Use content-handlers to get all directory slugs (more reliable than manual fs operations)
94+
const directorySlugs = getContentSlugs(contentType);
95+
console.log(`Found ${directorySlugs.length} items in ${contentType}`);
96+
97+
for (const directorySlug of directorySlugs) {
98+
const mdxPath = path.join(CONTENT_DIR, contentType, directorySlug, 'page.mdx');
99+
100+
if (fs.existsSync(mdxPath)) {
101+
try {
102+
const content = fs.readFileSync(mdxPath, 'utf-8');
103+
const metadata = extractMetadataFromCreateMetadata(content);
104+
105+
if (metadata) {
106+
const key = `${contentType}/${directorySlug}`;
107+
allMetadata[key] = {
108+
...metadata,
109+
slug: `/${contentType}/${directorySlug}`,
110+
type: metadata.type || contentType
111+
};
112+
console.log(`✓ Extracted metadata for ${key}: "${metadata.title}"`);
113+
totalFound++;
114+
} else {
115+
console.log(`⚠ No createMetadata found in ${contentType}/${directorySlug}`);
116+
}
117+
totalProcessed++;
118+
} catch (error: any) {
119+
console.error(`✗ Error processing ${contentType}/${directorySlug}:`, error.message);
120+
totalProcessed++;
121+
}
122+
}
123+
}
124+
} catch (error: any) {
125+
console.error(`✗ Error processing content type ${contentType}:`, error.message);
126+
}
127+
}
128+
129+
// Write to JSON file
130+
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(allMetadata, null, 2));
131+
console.log(`\n✓ Successfully extracted metadata for ${totalFound}/${totalProcessed} items to ${OUTPUT_FILE}`);
132+
console.log(`Cache contains ${Object.keys(allMetadata).length} entries`);
133+
134+
return allMetadata;
135+
}
136+
137+
// Run if called directly
138+
if (require.main === module) {
139+
extractAllMetadata().catch(console.error);
140+
}
141+
142+
export { extractAllMetadata };

0 commit comments

Comments
 (0)