|
| 1 | +#!/usr/bin/env node |
| 2 | +/** |
| 3 | + * Script to detect internal links with trailing slashes in MDX and Astro files |
| 4 | + * This helps identify inconsistent internal linking that can cause SEO issues |
| 5 | + */ |
| 6 | + |
| 7 | +import { readdir, readFile } from "fs/promises" |
| 8 | +import { join, extname, relative } from "path" |
| 9 | + |
| 10 | +interface LinkIssue { |
| 11 | + file: string |
| 12 | + line: number |
| 13 | + link: string |
| 14 | + context: string |
| 15 | +} |
| 16 | + |
| 17 | +// Configuration |
| 18 | +const INCLUDE_EXTENSIONS = [".mdx", ".astro", ".md"] |
| 19 | +const EXCLUDE_DIRS = ["node_modules", ".git", "dist", ".astro", "temp", "bin"] |
| 20 | +const DOCS_ROOT = process.cwd() |
| 21 | + |
| 22 | +// Regex patterns to match internal links |
| 23 | +const LINK_PATTERNS = [ |
| 24 | + // Markdown links: [text](/path/) |
| 25 | + /\[([^\]]*)\]\(([^)]*\/)\)/g, |
| 26 | + // HTML links: href="/path/" |
| 27 | + /href=["']([^"']*\/)["']/g, |
| 28 | + // Astro/JSX links: href={"/path/"} |
| 29 | + /href=\{["']([^"']*\/)["']\}/g, |
| 30 | + // import statements: from "/path/" |
| 31 | + /from\s+["']([^"']*\/)["']/g, |
| 32 | + // Astro components with paths: <Component path="/path/" /> |
| 33 | + /(?:path|src|href|to)=["']([^"']*\/)["']/g, |
| 34 | +] |
| 35 | + |
| 36 | +// Patterns to exclude (external links, file extensions, etc.) |
| 37 | +const EXCLUDE_PATTERNS = [ |
| 38 | + /^https?:\/\//, // External URLs |
| 39 | + /^mailto:/, // Email links |
| 40 | + /^tel:/, // Phone links |
| 41 | + /^#/, // Hash links |
| 42 | + /\.(jpg|jpeg|png|gif|svg|pdf|zip|tar|gz|css|js|json|xml|ico)$/i, // File extensions |
| 43 | +] |
| 44 | + |
| 45 | +async function getAllFiles(dir: string, allFiles: string[] = []): Promise<string[]> { |
| 46 | + try { |
| 47 | + const files = await readdir(dir, { withFileTypes: true }) |
| 48 | + |
| 49 | + for (const file of files) { |
| 50 | + const fullPath = join(dir, file.name) |
| 51 | + |
| 52 | + if (file.isDirectory()) { |
| 53 | + // Skip excluded directories |
| 54 | + if (!EXCLUDE_DIRS.includes(file.name)) { |
| 55 | + await getAllFiles(fullPath, allFiles) |
| 56 | + } |
| 57 | + } else if (file.isFile()) { |
| 58 | + // Include only specified file extensions |
| 59 | + if (INCLUDE_EXTENSIONS.includes(extname(file.name))) { |
| 60 | + allFiles.push(fullPath) |
| 61 | + } |
| 62 | + } |
| 63 | + } |
| 64 | + } catch (error) { |
| 65 | + console.warn(`Warning: Could not read directory ${dir}:`, error) |
| 66 | + } |
| 67 | + |
| 68 | + return allFiles |
| 69 | +} |
| 70 | + |
| 71 | +function extractLinks(content: string): { link: string; line: number; context: string }[] { |
| 72 | + const lines = content.split("\n") |
| 73 | + const links: { link: string; line: number; context: string }[] = [] |
| 74 | + |
| 75 | + lines.forEach((line, index) => { |
| 76 | + LINK_PATTERNS.forEach((pattern) => { |
| 77 | + let match |
| 78 | + // Reset regex state |
| 79 | + pattern.lastIndex = 0 |
| 80 | + |
| 81 | + while ((match = pattern.exec(line)) !== null) { |
| 82 | + // For markdown links [text](/path/), the URL is in group 2 |
| 83 | + // For all other patterns, the URL is in group 1 |
| 84 | + const linkUrl = match[0].startsWith("[") ? match[2] : match[1] |
| 85 | + |
| 86 | + if (linkUrl && isInternalLinkWithTrailingSlash(linkUrl)) { |
| 87 | + links.push({ |
| 88 | + link: linkUrl, |
| 89 | + line: index + 1, |
| 90 | + context: line.trim(), |
| 91 | + }) |
| 92 | + } |
| 93 | + } |
| 94 | + }) |
| 95 | + }) |
| 96 | + |
| 97 | + return links |
| 98 | +} |
| 99 | + |
| 100 | +function isInternalLinkWithTrailingSlash(url: string): boolean { |
| 101 | + // Skip if it matches any exclude pattern |
| 102 | + for (const pattern of EXCLUDE_PATTERNS) { |
| 103 | + if (pattern.test(url)) { |
| 104 | + return false |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + // Check if it's an internal path that ends with a slash |
| 109 | + if (url.startsWith("/") && url.endsWith("/") && url.length > 1) { |
| 110 | + return true |
| 111 | + } |
| 112 | + |
| 113 | + return false |
| 114 | +} |
| 115 | + |
| 116 | +async function analyzeFile(filePath: string): Promise<LinkIssue[]> { |
| 117 | + try { |
| 118 | + const content = await readFile(filePath, "utf-8") |
| 119 | + const links = extractLinks(content) |
| 120 | + const relativePath = relative(DOCS_ROOT, filePath) |
| 121 | + |
| 122 | + return links.map((link) => ({ |
| 123 | + file: relativePath, |
| 124 | + line: link.line, |
| 125 | + link: link.link, |
| 126 | + context: link.context, |
| 127 | + })) |
| 128 | + } catch (error) { |
| 129 | + console.warn(`Warning: Could not read file ${filePath}:`, error) |
| 130 | + return [] |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +function groupByFile(issues: LinkIssue[]): Map<string, LinkIssue[]> { |
| 135 | + const grouped = new Map<string, LinkIssue[]>() |
| 136 | + |
| 137 | + for (const issue of issues) { |
| 138 | + if (!grouped.has(issue.file)) { |
| 139 | + grouped.set(issue.file, []) |
| 140 | + } |
| 141 | + const fileIssues = grouped.get(issue.file) |
| 142 | + if (fileIssues) { |
| 143 | + fileIssues.push(issue) |
| 144 | + } |
| 145 | + } |
| 146 | + |
| 147 | + return grouped |
| 148 | +} |
| 149 | + |
| 150 | +function printResults(issues: LinkIssue[]) { |
| 151 | + if (issues.length === 0) { |
| 152 | + console.log("✅ No internal links with trailing slashes found!") |
| 153 | + return |
| 154 | + } |
| 155 | + |
| 156 | + console.log(`🔍 Found ${issues.length} internal links with trailing slashes:\n`) |
| 157 | + |
| 158 | + const grouped = groupByFile(issues) |
| 159 | + |
| 160 | + for (const [file, fileIssues] of grouped) { |
| 161 | + console.log(`📁 ${file} (${fileIssues.length} issues)`) |
| 162 | + |
| 163 | + for (const issue of fileIssues) { |
| 164 | + console.log(` Line ${issue.line}: ${issue.link}`) |
| 165 | + console.log(` Context: ${issue.context}`) |
| 166 | + console.log() |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + // Summary |
| 171 | + console.log(`\n📊 Summary:`) |
| 172 | + console.log(` Files affected: ${grouped.size}`) |
| 173 | + console.log(` Total issues: ${issues.length}`) |
| 174 | + |
| 175 | + // Most common problematic links |
| 176 | + const linkCounts = new Map<string, number>() |
| 177 | + for (const issue of issues) { |
| 178 | + linkCounts.set(issue.link, (linkCounts.get(issue.link) || 0) + 1) |
| 179 | + } |
| 180 | + |
| 181 | + const sortedLinks = Array.from(linkCounts.entries()) |
| 182 | + .sort((a, b) => b[1] - a[1]) |
| 183 | + .slice(0, 10) |
| 184 | + |
| 185 | + if (sortedLinks.length > 0) { |
| 186 | + console.log(`\n🔗 Most common problematic links:`) |
| 187 | + sortedLinks.forEach(([link, count]) => { |
| 188 | + console.log(` ${link} (${count} occurrences)`) |
| 189 | + }) |
| 190 | + } |
| 191 | +} |
| 192 | + |
| 193 | +async function main() { |
| 194 | + console.log("🔍 Scanning for internal links with trailing slashes...\n") |
| 195 | + |
| 196 | + try { |
| 197 | + const allFiles = await getAllFiles(DOCS_ROOT) |
| 198 | + console.log(`📋 Found ${allFiles.length} files to analyze`) |
| 199 | + |
| 200 | + if (allFiles.length === 0) { |
| 201 | + console.log("❌ No files found to analyze. Check the directory path.") |
| 202 | + return |
| 203 | + } |
| 204 | + |
| 205 | + const allIssues: LinkIssue[] = [] |
| 206 | + let processedFiles = 0 |
| 207 | + |
| 208 | + for (const file of allFiles) { |
| 209 | + const issues = await analyzeFile(file) |
| 210 | + allIssues.push(...issues) |
| 211 | + processedFiles++ |
| 212 | + |
| 213 | + if (processedFiles % 50 === 0) { |
| 214 | + console.log(` Processed ${processedFiles}/${allFiles.length} files...`) |
| 215 | + } |
| 216 | + } |
| 217 | + |
| 218 | + console.log(`✅ Finished analyzing ${processedFiles} files`) |
| 219 | + |
| 220 | + printResults(allIssues) |
| 221 | + |
| 222 | + // Exit with error code if issues found (useful for CI) |
| 223 | + if (allIssues.length > 0) { |
| 224 | + console.log("\n⚠️ Please fix the trailing slash issues above.") |
| 225 | + process.exit(1) |
| 226 | + } |
| 227 | + } catch (error) { |
| 228 | + console.error("❌ Error running analysis:", error) |
| 229 | + process.exit(1) |
| 230 | + } |
| 231 | +} |
| 232 | + |
| 233 | +// Run the script |
| 234 | +if (import.meta.url === `file://${process.argv[1]}`) { |
| 235 | + main() |
| 236 | +} |
| 237 | + |
| 238 | +export { main as detectTrailingSlashLinks } |
0 commit comments