-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathindex.js
More file actions
144 lines (128 loc) · 5.3 KB
/
index.js
File metadata and controls
144 lines (128 loc) · 5.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import {fileURLToPath} from 'url';
import {join} from 'path';
import fs from 'fs';
import {checkLinksInHtml, normalizeHtmlFilePath, loadExternalLinkCache, saveExternalLinkCache} from './check-links.js';
import fastGlob from 'fast-glob';
const LINK_CHECKER_DIR = '.link-checker';
const VERIFIED_LINKS_FILE = 'verified-external-links.tsv';
const BROKEN_LINKS_FILE = 'broken-links.log';
/**
* Ensures the link checker directory exists and has a .gitignore file
* that ignores broken-links.log and itself (so the directory only appears
* in git when verified-external-links.tsv exists).
*/
function ensureLinkCheckerDir(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, {recursive: true});
}
const gitignorePath = join(dirPath, '.gitignore');
if (!fs.existsSync(gitignorePath)) {
fs.writeFileSync(gitignorePath, `# Auto-generated by astro-broken-link-checker
# Only verified-external-links.tsv should be committed to git
.gitignore
broken-links.log
`, 'utf8');
}
}
export default function astroBrokenLinksChecker(options = {}) {
const linkCheckerDir = options.linkCheckerDir || LINK_CHECKER_DIR;
const logFilePath = join(linkCheckerDir, BROKEN_LINKS_FILE);
const verifiedLinksPath = join(linkCheckerDir, VERIFIED_LINKS_FILE);
const brokenLinksMap = new Map(); // Map of brokenLink -> Set of documents
const checkedLinks = new Map();
let externalLinkCache = null;
return {
name: 'astro-broken-links-checker',
hooks: {
'astro:config:setup': async ({config}) => {
//console.log('config.redirects', config.redirects);
// save the redirects to the options
options.astroConfigRedirects = config.redirects;
// use astro trailingSlash setting, falling back to astro default of 'ignore'
options.trailingSlash = config.trailingSlash || 'ignore';
},
'astro:build:done': async ({dir, logger}) => {
const astroConfigRedirects = options.astroConfigRedirects;
//console.log('astroConfigRedirects', astroConfigRedirects);
const distPath = fileURLToPath(dir);
const htmlFiles = await fastGlob('**/*.html', {cwd: distPath});
logger.info(`Checking ${htmlFiles.length} html pages for broken links`);
// Ensure link checker directory exists with .gitignore
ensureLinkCheckerDir(linkCheckerDir);
// Load external link cache if checking external links and caching is enabled
const cacheExternalLinks = options.cacheExternalLinks !== false;
if (options.checkExternalLinks && cacheExternalLinks) {
externalLinkCache = loadExternalLinkCache(verifiedLinksPath);
const cachedCount = externalLinkCache.size;
if (cachedCount > 0) {
logger.info(`Loaded ${cachedCount} verified external links from cache`);
}
}
// start time
const startTime = Date.now();
const checkHtmlPromises = htmlFiles.map(async (htmlFile) => {
const absoluteHtmlFilePath = join(distPath, htmlFile);
const htmlContent = fs.readFileSync(absoluteHtmlFilePath, 'utf8');
const baseUrl = normalizeHtmlFilePath(absoluteHtmlFilePath, distPath);
await checkLinksInHtml(
htmlContent,
brokenLinksMap,
baseUrl,
absoluteHtmlFilePath, // Document path
checkedLinks,
distPath,
astroConfigRedirects,
logger,
options.checkExternalLinks,
options.trailingSlash,
externalLinkCache,
);
});
await Promise.all(checkHtmlPromises);
// Save external link cache if checking external links and caching is enabled
if (options.checkExternalLinks && cacheExternalLinks && externalLinkCache) {
saveExternalLinkCache(verifiedLinksPath, externalLinkCache);
logger.info(`Saved ${externalLinkCache.size} verified external links to cache`);
}
logBrokenLinks(brokenLinksMap, logFilePath, logger, linkCheckerDir);
// end time
const endTime = Date.now();
logger.info(`Time to check links: ${endTime - startTime} ms`);
// stop the build if we have broken links and the option is set
if (options.throwError && brokenLinksMap.size > 0) {
throw new Error(`Broken links detected. Check the log file: ${logFilePath}`);
}
},
},
};
}
function logBrokenLinks(brokenLinksMap, logFilePath, logger, linkCheckerDir) {
if (brokenLinksMap.size > 0) {
let logData = '';
for (const [brokenLink, documentsSet] of brokenLinksMap.entries()) {
const documents = Array.from(documentsSet);
logData += `Broken link: ${brokenLink}\n Found in:\n`;
for (const doc of documents) {
logData += ` - ${doc}\n`;
}
}
logData = logData.trim();
if (logFilePath) {
// Ensure directory exists with .gitignore
if (linkCheckerDir) {
ensureLinkCheckerDir(linkCheckerDir);
}
fs.writeFileSync(logFilePath, logData, 'utf8');
logger.info(`Broken links have been logged to ${logFilePath}`);
logger.info(logData);
} else {
logger.info(logData);
}
} else {
logger.info('No broken links detected.');
if (fs.existsSync(logFilePath)) {
logger.info('Removing old log file:', logFilePath);
fs.rmSync(logFilePath);
}
}
}