diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..178135c2 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +/dist/ diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 00000000..288562fa --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +nodejs 23.11.0 diff --git a/dist/parse.js b/dist/parse.js new file mode 100644 index 00000000..bfeee3d8 --- /dev/null +++ b/dist/parse.js @@ -0,0 +1,35 @@ +import mit from "markdown-it"; +export const parse = (content) => { + const parser = mit(); + const tokens = parser.parse(content, {}); + const parsedLinks = []; + const parsedImages = []; + const scan = (tokens) => { + tokens.forEach((token, index) => { + if (token.type === "link_open") { + const indexOfNextClose = tokens.findIndex((t2, i2) => i2 > index && t2.type === "link_close"); + if (indexOfNextClose > index) { + parsedLinks.push({ + target: token.attrGet("href"), + content: tokens + .slice(index + 1, indexOfNextClose) + .map((t) => t.content) + .join(""), + }); + } + } + if (token.type === "image") + parsedImages.push({ + src: token.attrGet("src"), + alt: token.content, + }); + if (token.children) + scan(token.children); + }); + }; + scan(tokens); + return { + links: parsedLinks, + images: parsedImages, + }; +}; diff --git a/dist/validateLinks.js b/dist/validateLinks.js new file mode 100644 index 00000000..018c7ea7 --- /dev/null +++ b/dist/validateLinks.js @@ -0,0 +1,80 @@ +import { exec } from "node:child_process"; +import { readFile } from "node:fs/promises"; +import { parse } from "./parse.js"; +import path, { dirname, normalize } from "node:path/posix"; +import { isAbsolute } from "node:path"; +const findAllFilesInGit = async () => { + return await new Promise((resolve, reject) => { + exec("git ls-files -z", (error, stdout, stderr) => { + if (error) + reject(error); + if (stderr) + reject(new Error(`git ls-files outputted on stderr: ${stderr}`)); + else + resolve(stdout.split("\0").filter(Boolean)); + }); + }); +}; +const findMarkdownFiles = (files) => { + const ignorePattern = /^(README|LICENSE|contributing\/)/; + return files.filter((f) => f.toLocaleLowerCase().endsWith(".md") && !ignorePattern.test(f)); +}; +const scanForLinks = async (filenames) => { + return Promise.all(filenames.map(async (filename) => { + const content = await readFile(filename, "utf-8"); + return { filename, ...parse(content) }; + })); +}; +const externalLinkPattern = /^\w+:/; +const isExternalLink = (t) => externalLinkPattern.test(t); +const main = async () => { + const gitFiles = await findAllFilesInGit(); + // For now, we assume that there are no case clashes + const lowercaseGitFiles = gitFiles.map((s) => s.toLocaleLowerCase()); + const markdownFilenames = findMarkdownFiles(gitFiles); + const parsedFiles = await scanForLinks(markdownFilenames); + let errors = 0; + for (const parsedFile of parsedFiles) { + for (const img of parsedFile.images) { + if (!isExternalLink(img.src)) { + const resolved = path.join(dirname(parsedFile.filename), img.src); + const exists = lowercaseGitFiles.includes(resolved.toLocaleLowerCase()); + if (!exists) { + console.log(`error BROKEN-INTERNAL-IMAGE ${parsedFile.filename}:0 Broken internal image reference ${img.src}`); + ++errors; + } + } + } + for (const link of parsedFile.links) { + if (link.target.startsWith("#")) { + // Already checked by the linter + continue; + } + if (!isExternalLink(link.target)) { + const target = link.target.split("#")[0]; + let resolved; + if (isAbsolute(target)) { + resolved = normalize(`./${target}`); + } + else { + resolved = normalize(path.join(dirname(parsedFile.filename), target)); + } + const isFile = lowercaseGitFiles.includes(resolved.toLocaleLowerCase()); + const resolvedWithTrailingSlash = resolved.endsWith("/") + ? resolved.toLocaleLowerCase() + : `${resolved.toLocaleLowerCase()}/`; + const isDirectory = lowercaseGitFiles.some((s) => s.startsWith(resolvedWithTrailingSlash)); + if (!isFile && !isDirectory) { + console.log(`error BROKEN-INTERNAL-LINK ${parsedFile.filename}:0 Link target does not exist: ${target}`); + ++errors; + } + } + } + } + if (errors > 0) + process.exit(1); +}; +main().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/lint b/lint index 6118e276..e0dae5d8 100755 --- a/lint +++ b/lint @@ -12,6 +12,31 @@ else fi ' 0 - npm exec -- markdownlint --ignore 'node_modules/' '**/*.md' - npm exec -- prettier --check . + rc=0 + + echo "validateLinks: ..." + if npm exec -- node dist/validateLinks.js ; then + echo validateLinks: PASS + else + echo validateLinks: FAIL + rc=1 + fi + + echo "markdownlint: ..." + if npm exec -- markdownlint --ignore 'node_modules/' '**/*.md' ; then + echo "markdownlint: PASS" + else + echo "markdownlint: FAIL" + rc=1 + fi + + echo "prettier: ..." + if npm exec -- prettier --check . ; then + echo "prettier: PASS" + else + echo "prettier: FAIL" + rc=1 + fi + + exit $rc fi diff --git a/package-lock.json b/package-lock.json index a8c8ca11..be259e5f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,6 +7,11 @@ "dependencies": { "markdownlint-cli": "^0.44.0", "prettier": "^3.5.3" + }, + "devDependencies": { + "@types/markdown-it": "^14.1.2", + "@types/node": "^22.14.1", + "typescript": "^5.8.3" } }, "node_modules/@isaacs/cliui": { @@ -51,12 +56,47 @@ "integrity": "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==", "license": "MIT" }, + "node_modules/@types/linkify-it": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@types/linkify-it/-/linkify-it-5.0.0.tgz", + "integrity": "sha512-sVDA58zAw4eWAffKOaQH5/5j3XeayukzDk+ewSsnv3p4yJEZHCCzMDiZM8e0OUrRvmpGZ85jf4yDHkHsgBNr9Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/markdown-it": { + "version": "14.1.2", + "resolved": "https://registry.npmjs.org/@types/markdown-it/-/markdown-it-14.1.2.tgz", + "integrity": "sha512-promo4eFwuiW+TfGxhi+0x3czqTYJkG8qB17ZUJiVF10Xm7NLVRSLUsfRTU/6h1e24VvRnXCx+hG7li58lkzog==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/linkify-it": "^5", + "@types/mdurl": "^2" + } + }, + "node_modules/@types/mdurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@types/mdurl/-/mdurl-2.0.0.tgz", + "integrity": "sha512-RGdgjQUZba5p6QEFAVx2OGb8rQDL/cPRG7GiedRzMcJ1tYnUANBncjbSB1NRGwbvjcPeikRABz2nshyPk1bhWg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/ms": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", "license": "MIT" }, + "node_modules/@types/node": { + "version": "22.14.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.14.1.tgz", + "integrity": "sha512-u0HuPQwe/dHrItgHHpmw3N2fYCR6x4ivMNbPHRkBVP4CvN+kiRrKHWk3i8tXiO/joPwXLMYvF9TTF0eqgHIuOw==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, "node_modules/@types/unist": { "version": "2.0.11", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", @@ -1321,12 +1361,33 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/typescript": { + "version": "5.8.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", + "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, "node_modules/uc.micro": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz", "integrity": "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==", "license": "MIT" }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 0a53eb70..63d700b5 100644 --- a/package.json +++ b/package.json @@ -2,5 +2,11 @@ "dependencies": { "markdownlint-cli": "^0.44.0", "prettier": "^3.5.3" + }, + "type": "module", + "devDependencies": { + "@types/markdown-it": "^14.1.2", + "@types/node": "^22.14.1", + "typescript": "^5.8.3" } } diff --git a/parse.ts b/parse.ts new file mode 100644 index 00000000..1857dc61 --- /dev/null +++ b/parse.ts @@ -0,0 +1,60 @@ +import mit from "markdown-it"; +import type { Token } from "markdown-it/index.js"; + +export type ParsedLink = { + readonly target: string; + readonly content: string; +}; + +export type ParsedImage = { + readonly src: string; + readonly alt: string; +}; + +export type ParseResult = { + readonly links: readonly ParsedLink[]; + readonly images: readonly ParsedImage[]; +}; + +export const parse = (content: string): ParseResult => { + const parser = mit(); + const tokens = parser.parse(content, {}); + + const parsedLinks: ParsedLink[] = []; + const parsedImages: ParsedImage[] = []; + + const scan = (tokens: Token[]) => { + tokens.forEach((token, index) => { + if (token.type === "link_open") { + const indexOfNextClose = tokens.findIndex( + (t2, i2) => i2 > index && t2.type === "link_close", + ); + + if (indexOfNextClose > index) { + parsedLinks.push({ + target: token.attrGet("href") as string, + content: tokens + .slice(index + 1, indexOfNextClose) + .map((t) => t.content) + .join(""), + }); + } + } + + if (token.type === "image") + parsedImages.push({ + src: token.attrGet("src") as string, + alt: token.content, + }); + + if (token.children) scan(token.children); + }); + }; + + scan(tokens); + + return { + links: parsedLinks, + images: parsedImages, + }; +}; diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 00000000..beb9c155 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,12 @@ +{ + "compilerOptions": { + "target": "ESNext", + "module": "NodeNext", + "moduleResolution": "nodenext", + "outDir": "./dist/", + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": true, + "skipLibCheck": true + } +} diff --git a/validateLinks.ts b/validateLinks.ts new file mode 100644 index 00000000..2c4d3a53 --- /dev/null +++ b/validateLinks.ts @@ -0,0 +1,108 @@ +import { exec } from "node:child_process"; + +import { readFile } from "node:fs/promises"; +import { parse, type ParseResult } from "./parse.js"; +import path, { dirname, normalize } from "node:path/posix"; +import { isAbsolute } from "node:path"; + +type ParsedFile = ParseResult & { readonly filename: string }; + +const findAllFilesInGit = async (): Promise => { + return await new Promise((resolve, reject) => { + exec("git ls-files -z", (error, stdout, stderr) => { + if (error) reject(error); + if (stderr) + reject(new Error(`git ls-files outputted on stderr: ${stderr}`)); + else resolve(stdout.split("\0").filter(Boolean)); + }); + }); +}; + +const findMarkdownFiles = (files: string[]): string[] => { + const ignorePattern = /^(README|LICENSE|contributing\/)/; + return files.filter( + (f) => f.toLocaleLowerCase().endsWith(".md") && !ignorePattern.test(f), + ); +}; + +const scanForLinks = async (filenames: string[]): Promise => { + return Promise.all( + filenames.map(async (filename) => { + const content = await readFile(filename, "utf-8"); + return { filename, ...parse(content) }; + }), + ); +}; + +const externalLinkPattern = /^\w+:/; +const isExternalLink = (t: string) => externalLinkPattern.test(t); + +const main = async () => { + const gitFiles = await findAllFilesInGit(); + + // For now, we assume that there are no case clashes + const lowercaseGitFiles = gitFiles.map((s) => s.toLocaleLowerCase()); + + const markdownFilenames = findMarkdownFiles(gitFiles); + const parsedFiles = await scanForLinks(markdownFilenames); + + let errors = 0; + + for (const parsedFile of parsedFiles) { + for (const img of parsedFile.images) { + if (!isExternalLink(img.src)) { + const resolved = path.join(dirname(parsedFile.filename), img.src); + + const exists = lowercaseGitFiles.includes(resolved.toLocaleLowerCase()); + + if (!exists) { + console.log( + `error BROKEN-INTERNAL-IMAGE ${parsedFile.filename}:0 Broken internal image reference ${img.src}`, + ); + ++errors; + } + } + } + + for (const link of parsedFile.links) { + if (link.target.startsWith("#")) { + // Already checked by the linter + continue; + } + + if (!isExternalLink(link.target)) { + const target = link.target.split("#")[0]; + let resolved: string; + + if (isAbsolute(target)) { + resolved = normalize(`./${target}`); + } else { + resolved = normalize(path.join(dirname(parsedFile.filename), target)); + } + + const isFile = lowercaseGitFiles.includes(resolved.toLocaleLowerCase()); + + const resolvedWithTrailingSlash = resolved.endsWith("/") + ? resolved.toLocaleLowerCase() + : `${resolved.toLocaleLowerCase()}/`; + const isDirectory = lowercaseGitFiles.some((s) => + s.startsWith(resolvedWithTrailingSlash), + ); + + if (!isFile && !isDirectory) { + console.log( + `error BROKEN-INTERNAL-LINK ${parsedFile.filename}:0 Link target does not exist: ${target}`, + ); + ++errors; + } + } + } + } + + if (errors > 0) process.exit(1); +}; + +main().catch((error) => { + console.error(error); + process.exit(1); +});