Skip to content

Commit 574c38d

Browse files
Merge pull request #34 from HackYourFuture-CPH/validate-links-take-2
Check that local image/link targets are valid
2 parents 6184811 + 7fa43c6 commit 574c38d

File tree

10 files changed

+391
-2
lines changed

10 files changed

+391
-2
lines changed

.prettierignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/dist/

.tool-versions

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nodejs 23.11.0

dist/parse.js

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import mit from "markdown-it";
2+
export const parse = (content) => {
3+
const parser = mit();
4+
const tokens = parser.parse(content, {});
5+
const parsedLinks = [];
6+
const parsedImages = [];
7+
const scan = (tokens) => {
8+
tokens.forEach((token, index) => {
9+
if (token.type === "link_open") {
10+
const indexOfNextClose = tokens.findIndex((t2, i2) => i2 > index && t2.type === "link_close");
11+
if (indexOfNextClose > index) {
12+
parsedLinks.push({
13+
target: token.attrGet("href"),
14+
content: tokens
15+
.slice(index + 1, indexOfNextClose)
16+
.map((t) => t.content)
17+
.join(""),
18+
});
19+
}
20+
}
21+
if (token.type === "image")
22+
parsedImages.push({
23+
src: token.attrGet("src"),
24+
alt: token.content,
25+
});
26+
if (token.children)
27+
scan(token.children);
28+
});
29+
};
30+
scan(tokens);
31+
return {
32+
links: parsedLinks,
33+
images: parsedImages,
34+
};
35+
};

dist/validateLinks.js

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { exec } from "node:child_process";
2+
import { readFile } from "node:fs/promises";
3+
import { parse } from "./parse.js";
4+
import path, { dirname, normalize } from "node:path/posix";
5+
import { isAbsolute } from "node:path";
6+
const findAllFilesInGit = async () => {
7+
return await new Promise((resolve, reject) => {
8+
exec("git ls-files -z", (error, stdout, stderr) => {
9+
if (error)
10+
reject(error);
11+
if (stderr)
12+
reject(new Error(`git ls-files outputted on stderr: ${stderr}`));
13+
else
14+
resolve(stdout.split("\0").filter(Boolean));
15+
});
16+
});
17+
};
18+
const findMarkdownFiles = (files) => {
19+
const ignorePattern = /^(README|LICENSE|contributing\/)/;
20+
return files.filter((f) => f.toLocaleLowerCase().endsWith(".md") && !ignorePattern.test(f));
21+
};
22+
const scanForLinks = async (filenames) => {
23+
return Promise.all(filenames.map(async (filename) => {
24+
const content = await readFile(filename, "utf-8");
25+
return { filename, ...parse(content) };
26+
}));
27+
};
28+
const externalLinkPattern = /^\w+:/;
29+
const isExternalLink = (t) => externalLinkPattern.test(t);
30+
const main = async () => {
31+
const gitFiles = await findAllFilesInGit();
32+
// For now, we assume that there are no case clashes
33+
const lowercaseGitFiles = gitFiles.map((s) => s.toLocaleLowerCase());
34+
const markdownFilenames = findMarkdownFiles(gitFiles);
35+
const parsedFiles = await scanForLinks(markdownFilenames);
36+
let errors = 0;
37+
for (const parsedFile of parsedFiles) {
38+
for (const img of parsedFile.images) {
39+
if (!isExternalLink(img.src)) {
40+
const resolved = path.join(dirname(parsedFile.filename), img.src);
41+
const exists = lowercaseGitFiles.includes(resolved.toLocaleLowerCase());
42+
if (!exists) {
43+
console.log(`error BROKEN-INTERNAL-IMAGE ${parsedFile.filename}:0 Broken internal image reference ${img.src}`);
44+
++errors;
45+
}
46+
}
47+
}
48+
for (const link of parsedFile.links) {
49+
if (link.target.startsWith("#")) {
50+
// Already checked by the linter
51+
continue;
52+
}
53+
if (!isExternalLink(link.target)) {
54+
const target = link.target.split("#")[0];
55+
let resolved;
56+
if (isAbsolute(target)) {
57+
resolved = normalize(`./${target}`);
58+
}
59+
else {
60+
resolved = normalize(path.join(dirname(parsedFile.filename), target));
61+
}
62+
const isFile = lowercaseGitFiles.includes(resolved.toLocaleLowerCase());
63+
const resolvedWithTrailingSlash = resolved.endsWith("/")
64+
? resolved.toLocaleLowerCase()
65+
: `${resolved.toLocaleLowerCase()}/`;
66+
const isDirectory = lowercaseGitFiles.some((s) => s.startsWith(resolvedWithTrailingSlash));
67+
if (!isFile && !isDirectory) {
68+
console.log(`error BROKEN-INTERNAL-LINK ${parsedFile.filename}:0 Link target does not exist: ${target}`);
69+
++errors;
70+
}
71+
}
72+
}
73+
}
74+
if (errors > 0)
75+
process.exit(1);
76+
};
77+
main().catch((error) => {
78+
console.error(error);
79+
process.exit(1);
80+
});

lint

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,31 @@ else
1212
fi
1313
' 0
1414

15-
npm exec -- markdownlint --ignore 'node_modules/' '**/*.md'
16-
npm exec -- prettier --check .
15+
rc=0
16+
17+
echo "validateLinks: ..."
18+
if npm exec -- node dist/validateLinks.js ; then
19+
echo validateLinks: PASS
20+
else
21+
echo validateLinks: FAIL
22+
rc=1
23+
fi
24+
25+
echo "markdownlint: ..."
26+
if npm exec -- markdownlint --ignore 'node_modules/' '**/*.md' ; then
27+
echo "markdownlint: PASS"
28+
else
29+
echo "markdownlint: FAIL"
30+
rc=1
31+
fi
32+
33+
echo "prettier: ..."
34+
if npm exec -- prettier --check . ; then
35+
echo "prettier: PASS"
36+
else
37+
echo "prettier: FAIL"
38+
rc=1
39+
fi
40+
41+
exit $rc
1742
fi

package-lock.json

Lines changed: 61 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,11 @@
22
"dependencies": {
33
"markdownlint-cli": "^0.44.0",
44
"prettier": "^3.5.3"
5+
},
6+
"type": "module",
7+
"devDependencies": {
8+
"@types/markdown-it": "^14.1.2",
9+
"@types/node": "^22.14.1",
10+
"typescript": "^5.8.3"
511
}
612
}

parse.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import mit from "markdown-it";
2+
import type { Token } from "markdown-it/index.js";
3+
4+
export type ParsedLink = {
5+
readonly target: string;
6+
readonly content: string;
7+
};
8+
9+
export type ParsedImage = {
10+
readonly src: string;
11+
readonly alt: string;
12+
};
13+
14+
export type ParseResult = {
15+
readonly links: readonly ParsedLink[];
16+
readonly images: readonly ParsedImage[];
17+
};
18+
19+
export const parse = (content: string): ParseResult => {
20+
const parser = mit();
21+
const tokens = parser.parse(content, {});
22+
23+
const parsedLinks: ParsedLink[] = [];
24+
const parsedImages: ParsedImage[] = [];
25+
26+
const scan = (tokens: Token[]) => {
27+
tokens.forEach((token, index) => {
28+
if (token.type === "link_open") {
29+
const indexOfNextClose = tokens.findIndex(
30+
(t2, i2) => i2 > index && t2.type === "link_close",
31+
);
32+
33+
if (indexOfNextClose > index) {
34+
parsedLinks.push({
35+
target: token.attrGet("href") as string,
36+
content: tokens
37+
.slice(index + 1, indexOfNextClose)
38+
.map((t) => t.content)
39+
.join(""),
40+
});
41+
}
42+
}
43+
44+
if (token.type === "image")
45+
parsedImages.push({
46+
src: token.attrGet("src") as string,
47+
alt: token.content,
48+
});
49+
50+
if (token.children) scan(token.children);
51+
});
52+
};
53+
54+
scan(tokens);
55+
56+
return {
57+
links: parsedLinks,
58+
images: parsedImages,
59+
};
60+
};

tsconfig.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"compilerOptions": {
3+
"target": "ESNext",
4+
"module": "NodeNext",
5+
"moduleResolution": "nodenext",
6+
"outDir": "./dist/",
7+
"esModuleInterop": true,
8+
"forceConsistentCasingInFileNames": true,
9+
"strict": true,
10+
"skipLibCheck": true
11+
}
12+
}

0 commit comments

Comments
 (0)