Skip to content

Commit 3271c48

Browse files
committed
Add link checker
1 parent 731558b commit 3271c48

File tree

10 files changed

+505
-2
lines changed

10 files changed

+505
-2
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
---
2+
name: Validate links
3+
4+
on:
5+
push:
6+
branches:
7+
- main
8+
pull_request:
9+
types:
10+
- opened
11+
- synchronize
12+
- reopened
13+
14+
jobs:
15+
build:
16+
name: Validate links
17+
runs-on: ubuntu-latest
18+
19+
steps:
20+
- name: Checkout code
21+
uses: actions/checkout@v4
22+
- name: npm install
23+
run: npm install
24+
- name: Run the checker
25+
run: node dist/validateLinks.js

.prettierignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/dist/

.tool-versions

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nodejs 23.11.0

dist/parse.js

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import mit from "markdown-it";
2+
export const parse = (content) => {
3+
const parser = mit();
4+
const tokens = parser.parse(content, {});
5+
const parsedLinks = [];
6+
const parsedImages = [];
7+
const scan = (tokens) => {
8+
tokens.forEach((token, index) => {
9+
if (token.type === "link_open") {
10+
const indexOfNextClose = tokens.findIndex((t2, i2) => i2 > index && t2.type === "link_close");
11+
if (indexOfNextClose > index) {
12+
parsedLinks.push({
13+
target: token.attrGet("href"),
14+
content: tokens
15+
.slice(index + 1, indexOfNextClose)
16+
.map((t) => t.content)
17+
.join(""),
18+
});
19+
}
20+
}
21+
if (token.type === "image")
22+
parsedImages.push({
23+
src: token.attrGet("src"),
24+
alt: token.content,
25+
});
26+
if (token.children)
27+
scan(token.children);
28+
});
29+
};
30+
scan(tokens);
31+
return {
32+
links: parsedLinks,
33+
images: parsedImages,
34+
};
35+
};

dist/validateLinks.js

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import { exec } from "node:child_process";
2+
import { readFile, stat } from "node:fs/promises";
3+
import { parse } from "./parse.js";
4+
import path, { dirname, normalize } from "node:path/posix";
5+
import { isAbsolute } from "node:path";
6+
const findMarkdownFilesInGit = async () => {
7+
return await new Promise((resolve, reject) => {
8+
exec("git ls-files -z", (error, stdout, stderr) => {
9+
if (error)
10+
reject(error);
11+
if (stderr)
12+
reject(new Error(`git ls-files outputted on stderr: ${stderr}`));
13+
else
14+
resolve(stdout.split("\0").filter((s) => s.endsWith(".md")));
15+
});
16+
});
17+
};
18+
const findMarkdownFiles = async () => {
19+
const ignorePattern = /^(README|LICENSE|contributing\/)/;
20+
return (await findMarkdownFilesInGit()).filter((f) => !ignorePattern.test(f));
21+
};
22+
const scanForLinks = async (filenames) => {
23+
return Promise.all(filenames.map(async (filename) => {
24+
const content = await readFile(filename, "utf-8");
25+
return { filename, ...parse(content) };
26+
}));
27+
};
28+
const externalLinkPattern = /^\w+:/;
29+
const isExternalLink = (t) => externalLinkPattern.test(t);
30+
const main = async () => {
31+
const markdownFilenames = await findMarkdownFiles();
32+
const parsedFiles = await scanForLinks(markdownFilenames);
33+
let errors = 0;
34+
for (const parsedFile of parsedFiles) {
35+
for (const img of parsedFile.images) {
36+
if (!isExternalLink(img.src)) {
37+
const resolved = path.join(dirname(parsedFile.filename), img.src);
38+
const exists = await stat(resolved).then(() => true, () => false);
39+
if (!exists) {
40+
console.log(`error BROKEN-INTERNAL-IMAGE ${parsedFile.filename}:0 Broken internal image reference ${img.src}`);
41+
++errors;
42+
}
43+
}
44+
}
45+
for (const link of parsedFile.links) {
46+
if (link.target.startsWith("#")) {
47+
// Already checked by the linter
48+
continue;
49+
}
50+
if (!isExternalLink(link.target)) {
51+
const target = link.target.split("#")[0];
52+
let resolved;
53+
if (isAbsolute(target)) {
54+
resolved = normalize(`./${target}`);
55+
}
56+
else {
57+
resolved = normalize(path.join(dirname(parsedFile.filename), target));
58+
}
59+
const stats = await stat(resolved).catch(() => undefined);
60+
if (stats?.isDirectory()) {
61+
const readmeExists = await stat(`${resolved}/README.md`).catch(() => undefined);
62+
if (readmeExists) {
63+
// console.log(
64+
// `info LINK-TO-DIR-WITH-README ${parsedFile.filename}:0 Link to a directory, which has a README: ${target}`
65+
// );
66+
}
67+
else {
68+
console.log(`error LINK-TO-RAW-DIR ${parsedFile.filename}:0 Link to a directory, which has no README: ${target}`);
69+
++errors;
70+
}
71+
}
72+
else if (stats === undefined) {
73+
console.log(`error BROKEN-INTERNAL-LINK ${parsedFile.filename}:0 Link target does not exist: ${target}`);
74+
++errors;
75+
}
76+
}
77+
}
78+
}
79+
if (errors > 0) {
80+
console.error("Link validation found errors");
81+
process.exit(1);
82+
}
83+
};
84+
main().catch((error) => {
85+
console.error(error);
86+
process.exit(1);
87+
});

package-lock.json

Lines changed: 62 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,11 @@
22
"dependencies": {
33
"markdownlint-cli": "^0.44.0",
44
"prettier": "^3.5.3"
5+
},
6+
"type": "module",
7+
"devDependencies": {
8+
"@types/markdown-it": "^14.1.2",
9+
"@types/node": "^22.14.1",
10+
"typescript": "^5.8.3"
511
}
612
}

parse.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import mit from "markdown-it";
2+
import type { Token } from "markdown-it/index.js";
3+
4+
export type ParsedLink = {
5+
readonly target: string;
6+
readonly content: string;
7+
};
8+
9+
export type ParsedImage = {
10+
readonly src: string;
11+
readonly alt: string;
12+
};
13+
14+
export type ParseResult = {
15+
readonly links: readonly ParsedLink[];
16+
readonly images: readonly ParsedImage[];
17+
};
18+
19+
export const parse = (content: string): ParseResult => {
20+
const parser = mit();
21+
const tokens = parser.parse(content, {});
22+
23+
const parsedLinks: ParsedLink[] = [];
24+
const parsedImages: ParsedImage[] = [];
25+
26+
const scan = (tokens: Token[]) => {
27+
tokens.forEach((token, index) => {
28+
if (token.type === "link_open") {
29+
const indexOfNextClose = tokens.findIndex(
30+
(t2, i2) => i2 > index && t2.type === "link_close",
31+
);
32+
33+
if (indexOfNextClose > index) {
34+
parsedLinks.push({
35+
target: token.attrGet("href") as string,
36+
content: tokens
37+
.slice(index + 1, indexOfNextClose)
38+
.map((t) => t.content)
39+
.join(""),
40+
});
41+
}
42+
}
43+
44+
if (token.type === "image")
45+
parsedImages.push({
46+
src: token.attrGet("src") as string,
47+
alt: token.content,
48+
});
49+
50+
if (token.children) scan(token.children);
51+
});
52+
};
53+
54+
scan(tokens);
55+
56+
return {
57+
links: parsedLinks,
58+
images: parsedImages,
59+
};
60+
};

0 commit comments

Comments
 (0)