Skip to content

Commit ca823ce

Browse files
Filmbostock
andauthored
detect broken links (#1698)
* detect broken links closes #363 closes #1683 * move links, anchors * tidy link checking * edits * validateLinks * tweak style --------- Co-authored-by: Mike Bostock <[email protected]>
1 parent 8c61a38 commit ca823ce

File tree

6 files changed

+82
-4
lines changed

6 files changed

+82
-4
lines changed

src/build.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,9 +365,37 @@ export async function build(
365365
}
366366
effects.logger.log("");
367367

368+
// Check links. TODO Have this break the build, and move this check earlier?
369+
const [validLinks, brokenLinks] = validateLinks(outputs);
370+
if (brokenLinks.length) {
371+
effects.logger.warn(`${yellow("Warning: ")}${brokenLinks.length} broken link${brokenLinks.length === 1 ? "" : "s"} (${validLinks.length + brokenLinks.length} validated)`); // prettier-ignore
372+
for (const [path, link] of brokenLinks) effects.logger.log(`${faint("↳")} ${path} ${faint("→")} ${red(link)}`);
373+
} else if (validLinks.length) {
374+
effects.logger.log(`${green(`${validLinks.length}`)} link${validLinks.length === 1 ? "" : "s"} validated`);
375+
}
376+
368377
Telemetry.record({event: "build", step: "finish", pageCount});
369378
}
370379

380+
type Link = [path: string, target: string];
381+
382+
function validateLinks(outputs: Map<string, {resolvers: Resolvers}>): [valid: Link[], broken: Link[]] {
383+
const validTargets = new Set<string>(outputs.keys()); // e.g., "/this/page#hash";
384+
for (const [path, {resolvers}] of outputs) {
385+
for (const anchor of resolvers.anchors) {
386+
validTargets.add(`${path}#${encodeURIComponent(anchor)}`);
387+
}
388+
}
389+
const valid: Link[] = [];
390+
const broken: Link[] = [];
391+
for (const [path, {resolvers}] of outputs) {
392+
for (const target of resolvers.localLinks) {
393+
(validTargets.has(target) ? valid : broken).push([path, target]);
394+
}
395+
}
396+
return [valid, broken];
397+
}
398+
371399
function applyHash(path: string, hash: string): string {
372400
const ext = extname(path);
373401
let name = basename(path, ext);

src/html.ts

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import he from "he";
33
import hljs from "highlight.js";
44
import type {DOMWindow} from "jsdom";
55
import {JSDOM, VirtualConsole} from "jsdom";
6-
import {isAssetPath, relativePath, resolveLocalPath} from "./path.js";
6+
import {isAssetPath, parseRelativeUrl, relativePath, resolveLocalPath, resolvePath} from "./path.js";
77

88
const ASSET_ATTRIBUTES: readonly [selector: string, src: string][] = [
99
["a[href][download]", "href"],
@@ -41,6 +41,8 @@ export function parseHtml(html: string): DOMWindow {
4141

4242
interface Assets {
4343
files: Set<string>;
44+
anchors: Set<string>;
45+
localLinks: Set<string>;
4446
localImports: Set<string>;
4547
globalImports: Set<string>;
4648
staticImports: Set<string>;
@@ -49,6 +51,8 @@ interface Assets {
4951
export function findAssets(html: string, path: string): Assets {
5052
const {document} = parseHtml(html);
5153
const files = new Set<string>();
54+
const anchors = new Set<string>();
55+
const localLinks = new Set<string>();
5256
const localImports = new Set<string>();
5357
const globalImports = new Set<string>();
5458
const staticImports = new Set<string>();
@@ -99,7 +103,20 @@ export function findAssets(html: string, path: string): Assets {
99103
}
100104
}
101105

102-
return {files, localImports, globalImports, staticImports};
106+
for (const element of document.querySelectorAll<HTMLElement>("[id],[name]")) {
107+
if (isExternal(element)) continue;
108+
anchors.add(element.getAttribute("id") ?? element.getAttribute("name")!);
109+
}
110+
111+
for (const a of document.querySelectorAll<HTMLAnchorElement>("a[href]")) {
112+
if (isExternal(a) || a.hasAttribute("download")) continue;
113+
const href = a.getAttribute("href")!;
114+
if (/^\w+:/.test(href)) continue; // URL
115+
const {pathname, search, hash} = parseRelativeUrl(href);
116+
localLinks.add(resolvePath(path, pathname).replace(/\.html$/i, "").replace(/\/$/, "/index") + search + hash); // prettier-ignore
117+
}
118+
119+
return {files, localImports, globalImports, staticImports, localLinks, anchors};
103120
}
104121

105122
export function rewriteHtmlPaths(html: string, path: string): string {

src/path.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ export function resolvePath(root: string, source: string, target: string): strin
3232
export function resolvePath(root: string, source: string, target?: string): string {
3333
if (target === undefined) (target = source), (source = root), (root = ".");
3434
const path = join(root, target === "" ? source : target.startsWith("/") ? "." : dirname(source), target);
35-
return path.startsWith("../") ? path : `/${path}`;
35+
return path.startsWith("../") ? path : join("/", path);
3636
}
3737

3838
/**

src/resolvers.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ export interface Resolvers {
1919
hash: string;
2020
assets: Set<string>; // like files, but not registered for FileAttachment
2121
files: Set<string>;
22+
anchors: Set<string>;
23+
localLinks: Set<string>;
2224
localImports: Set<string>;
2325
globalImports: Set<string>;
2426
staticImports: Set<string>;
@@ -88,6 +90,8 @@ export async function getResolvers(page: MarkdownPage, config: ResolversConfig):
8890
const assets = new Set<string>();
8991
const files = new Set<string>();
9092
const fileMethods = new Set<string>();
93+
const anchors = new Set<string>();
94+
const localLinks = new Set<string>();
9195
const localImports = new Set<string>();
9296
const globalImports = new Set<string>(defaultImports);
9397
const staticImports = new Set<string>(defaultImports);
@@ -98,6 +102,8 @@ export async function getResolvers(page: MarkdownPage, config: ResolversConfig):
98102
if (!html) continue;
99103
const info = findAssets(html, path);
100104
for (const f of info.files) assets.add(f);
105+
for (const a of info.anchors) anchors.add(a);
106+
for (const l of info.localLinks) localLinks.add(l);
101107
for (const i of info.localImports) localImports.add(i);
102108
for (const i of info.globalImports) globalImports.add(i);
103109
for (const i of info.staticImports) staticImports.add(i);
@@ -151,6 +157,8 @@ export async function getResolvers(page: MarkdownPage, config: ResolversConfig):
151157
path,
152158
hash: hash.digest("hex"),
153159
assets,
160+
anchors,
161+
localLinks,
154162
...(await resolveResolvers(
155163
{
156164
files,
@@ -172,6 +180,8 @@ export async function getModuleResolvers(path: string, config: Omit<ResolversCon
172180
path,
173181
hash: getModuleHash(root, path),
174182
assets: new Set(),
183+
anchors: new Set(),
184+
localLinks: new Set(),
175185
...(await resolveResolvers({localImports: [path], staticImports: [path]}, {path, ...config}))
176186
};
177187
}
@@ -193,7 +203,7 @@ async function resolveResolvers(
193203
stylesheets?: Iterable<string> | null;
194204
},
195205
{root, path, normalizePath, loaders}: ResolversConfig
196-
): Promise<Omit<Resolvers, "path" | "hash" | "assets">> {
206+
): Promise<Omit<Resolvers, "path" | "hash" | "assets" | "anchors" | "localLinks">> {
197207
const files = new Set<string>(initialFiles);
198208
const fileMethods = new Set<string>(initialFileMethods);
199209
const localImports = new Set<string>(initialLocalImports);

test/html-test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,26 @@ describe("findAssets(html, path)", () => {
100100
const html = '<script src="test.js" type="other">';
101101
assert.deepStrictEqual(findAssets(html, "foo").files, new Set(["./test.js"]));
102102
});
103+
it("finds anchors by [id] or [name]", () => {
104+
const html = '<a id="id1">foo</a> <a name="id2">bar</a>';
105+
assert.deepStrictEqual(findAssets(html, "foo").anchors, new Set(["id1", "id2"]));
106+
});
107+
it("finds local links by a[href]", () => {
108+
const html = '<a href="#anchor">a</a> <a href="other#baz">b</a> <a href="?test">self</a>';
109+
assert.deepStrictEqual(findAssets(html, "foo").localLinks, new Set(["/foo#anchor", "/other#baz", "/foo?test"]));
110+
});
111+
it("finds relative links", () => {
112+
const html = '<a href="./test">a</a>';
113+
assert.deepStrictEqual(findAssets(html, "foo/bar").localLinks, new Set(["/foo/test"]));
114+
});
115+
it("finds links that go up", () => {
116+
const html = '<a href="../test">a</a>';
117+
assert.deepStrictEqual(findAssets(html, "foo/bar").localLinks, new Set(["/test"]));
118+
});
119+
it("finds links that go above the root", () => {
120+
const html = '<a href="../test">a</a>';
121+
assert.deepStrictEqual(findAssets(html, "foo").localLinks, new Set(["../test"]));
122+
});
103123
});
104124

105125
describe("rewriteHtml(html, resolve)", () => {

test/path-test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@ import {isPathImport, parseRelativeUrl, relativePath, resolveLocalPath, resolveP
44
describe("resolvePath(source, target)", () => {
55
it("returns the path to the specified target within the source root", () => {
66
assert.strictEqual(resolvePath("foo", "baz"), "/baz");
7+
assert.strictEqual(resolvePath("foo", "./"), "/");
78
assert.strictEqual(resolvePath("./foo", "./baz"), "/baz");
89
assert.strictEqual(resolvePath("/foo", "baz"), "/baz");
910
assert.strictEqual(resolvePath("/foo", "./baz"), "/baz");
1011
assert.strictEqual(resolvePath("foo/bar", "baz"), "/foo/baz");
12+
assert.strictEqual(resolvePath("foo/bar", "./"), "/foo/");
1113
assert.strictEqual(resolvePath("./foo/bar", "./baz"), "/foo/baz");
1214
assert.strictEqual(resolvePath("/foo/bar", "baz"), "/foo/baz");
1315
assert.strictEqual(resolvePath("/foo/bar", "./baz"), "/foo/baz");
16+
assert.strictEqual(resolvePath("/foo/bar", "../"), "/");
1417
});
1518
it("allows paths outside the root", () => {
1619
assert.strictEqual(resolvePath("foo", "../baz"), "../baz");

0 commit comments

Comments
 (0)