Skip to content

Commit 106c0f5

Browse files
Filmbostock
andauthored
normalize links (#1037)
* centralize md instance as part of the normalized configuration * move test * normalize links with .html is cleanUrls is false (defaults to true) * document cleanUrls * makeLinkNormalizer * simpler link normalization * server-side normalization * parse instead of regex * fix directory link normalization * docs; version badge * shorter --------- Co-authored-by: Mike Bostock <[email protected]>
1 parent a94ce04 commit 106c0f5

File tree

8 files changed

+240
-91
lines changed

8 files changed

+240
-91
lines changed

docs/config.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@ export default {
167167

168168
The base path when serving the site. Currently this only affects the custom 404 page, if any.
169169

170+
## cleanUrls <a href="https://github.com/observablehq/framework/pull/1037" target="_blank" class="observablehq-version-badge" data-version="prerelease" title="Added in #1037"></a>
171+
172+
Whether page links should be “clean”, _i.e._, formatted without a `.html` extension. Defaults to true. If true, a link to `config.html` will be formatted as `config`. Regardless of this setting, a link to an index page will drop the implied `index.html`; for example `foo/index.html` will be formatted as `foo/`.
173+
170174
## toc
171175

172176
The table of contents configuration.

src/markdown.ts

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/* eslint-disable import/no-named-as-default-member */
22
import {createHash} from "node:crypto";
3+
import {extname} from "node:path/posix";
34
import matter from "gray-matter";
45
import he from "he";
56
import MarkdownIt from "markdown-it";
@@ -275,13 +276,47 @@ function makeSoftbreakRenderer(baseRenderer: RenderRule): RenderRule {
275276
};
276277
}
277278

279+
export function parseRelativeUrl(url: string): {pathname: string; search: string; hash: string} {
280+
let search: string;
281+
let hash: string;
282+
const i = url.indexOf("#");
283+
if (i < 0) hash = "";
284+
else (hash = url.slice(i)), (url = url.slice(0, i));
285+
const j = url.indexOf("?");
286+
if (j < 0) search = "";
287+
else (search = url.slice(j)), (url = url.slice(0, j));
288+
return {pathname: url, search, hash};
289+
}
290+
291+
export function makeLinkNormalizer(baseNormalize: (url: string) => string, clean: boolean): (url: string) => string {
292+
return (url) => {
293+
// Only clean relative links; ignore e.g. "https:" links.
294+
if (!/^\w+:/.test(url)) {
295+
const u = parseRelativeUrl(url);
296+
let {pathname} = u;
297+
if (pathname && !pathname.endsWith("/") && !extname(pathname)) pathname += ".html";
298+
if (pathname === "index.html") pathname = ".";
299+
else if (pathname.endsWith("/index.html")) pathname = pathname.slice(0, -"index.html".length);
300+
else if (clean) pathname = pathname.replace(/\.html$/, "");
301+
url = pathname + u.search + u.hash;
302+
}
303+
return baseNormalize(url);
304+
};
305+
}
306+
278307
export interface ParseOptions {
279308
md: MarkdownIt;
280309
path: string;
281310
style?: Config["style"];
282311
}
283312

284-
export function createMarkdownIt({markdownIt}: {markdownIt?: (md: MarkdownIt) => MarkdownIt} = {}): MarkdownIt {
313+
export function createMarkdownIt({
314+
markdownIt,
315+
cleanUrls = true
316+
}: {
317+
markdownIt?: (md: MarkdownIt) => MarkdownIt;
318+
cleanUrls?: boolean;
319+
} = {}): MarkdownIt {
285320
const md = MarkdownIt({html: true, linkify: true});
286321
md.linkify.set({fuzzyLink: false, fuzzyEmail: false});
287322
md.use(MarkdownItAnchor, {permalink: MarkdownItAnchor.permalink.headerLink({class: "observablehq-header-anchor"})});
@@ -290,6 +325,7 @@ export function createMarkdownIt({markdownIt}: {markdownIt?: (md: MarkdownIt) =>
290325
md.renderer.rules.placeholder = makePlaceholderRenderer();
291326
md.renderer.rules.fence = makeFenceRenderer(md.renderer.rules.fence!);
292327
md.renderer.rules.softbreak = makeSoftbreakRenderer(md.renderer.rules.softbreak!);
328+
md.normalizeLink = makeLinkNormalizer(md.normalizeLink, cleanUrls);
293329
return markdownIt === undefined ? md : markdownIt(md);
294330
}
295331

src/preview.ts

Lines changed: 17 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import {createHash} from "node:crypto";
22
import {watch} from "node:fs";
33
import type {FSWatcher, WatchEventType} from "node:fs";
4-
import {access, constants, readFile, stat} from "node:fs/promises";
4+
import {access, constants, readFile} from "node:fs/promises";
55
import {createServer} from "node:http";
66
import type {IncomingMessage, RequestListener, Server, ServerResponse} from "node:http";
7-
import {basename, dirname, extname, join, normalize} from "node:path/posix";
7+
import {basename, dirname, join, normalize} from "node:path/posix";
88
import {difference} from "d3-array";
99
import type {PatchItem} from "fast-array-diff";
1010
import {getPatch} from "fast-array-diff";
@@ -152,55 +152,28 @@ export class PreviewServer {
152152
throw new HttpError(`Not found: ${pathname}`, 404);
153153
} else {
154154
if ((pathname = normalize(pathname)).startsWith("..")) throw new Error("Invalid path: " + pathname);
155-
let path = join(root, pathname);
156-
157-
// If this path is for /index, redirect to the parent directory for a
158-
// tidy path. (This must be done before implicitly adding /index below!)
159-
// Respect precedence of dir/index.md over dir.md in choosing between
160-
// dir/ and dir!
161-
if (basename(path, ".html") === "index") {
162-
try {
163-
await stat(join(dirname(path), "index.md"));
164-
res.writeHead(302, {Location: join(dirname(pathname), "/") + url.search});
165-
res.end();
166-
return;
167-
} catch (error) {
168-
if (!isEnoent(error)) throw error;
169-
res.writeHead(302, {Location: dirname(pathname) + url.search});
170-
res.end();
171-
return;
172-
}
173-
}
174155

175-
// If this path resolves to a directory, then add an implicit /index to
176-
// the end of the path, assuming that the corresponding index.md exists.
177-
try {
178-
if ((await stat(path)).isDirectory() && (await stat(join(path, "index.md"))).isFile()) {
179-
if (!pathname.endsWith("/")) {
180-
res.writeHead(302, {Location: pathname + "/" + url.search});
181-
res.end();
182-
return;
183-
}
184-
pathname = join(pathname, "index");
185-
path = join(path, "index");
186-
}
187-
} catch (error) {
188-
if (!isEnoent(error)) throw error; // internal error
189-
}
190-
191-
// If this path ends with .html, then redirect to drop the .html. TODO:
192-
// Check for the existence of the .md file first.
193-
if (extname(path) === ".html") {
194-
res.writeHead(302, {Location: join(dirname(pathname), basename(pathname, ".html")) + url.search});
156+
// Normalize the pathname (e.g., dropping ".html").
157+
const normalizedPathname = config.md.normalizeLink(pathname);
158+
if (pathname !== normalizedPathname) {
159+
res.writeHead(302, {Location: normalizedPathname + url.search});
195160
res.end();
196161
return;
197162
}
198163

199-
// Otherwise, serve the corresponding Markdown file, if it exists.
164+
// If this path ends with a slash, then add an implicit /index to the
165+
// end of the path.
166+
let path = join(root, pathname);
167+
if (pathname.endsWith("/")) {
168+
pathname = join(pathname, "index");
169+
path = join(path, "index");
170+
}
171+
172+
// Lastly, serve the corresponding Markdown file, if it exists.
200173
// Anything else should 404; static files should be matched above.
201174
try {
202175
const options = {path: pathname, ...config, preview: true};
203-
const source = await readFile(path + ".md", "utf8");
176+
const source = await readFile(join(dirname(path), basename(path, ".html") + ".md"), "utf8");
204177
const parse = parseMarkdown(source, options);
205178
const html = await renderPage(parse, options);
206179
end(req, res, html, "text/html");
@@ -365,7 +338,7 @@ function handleWatch(socket: WebSocket, req: IncomingMessage, config: Config) {
365338
path = decodeURIComponent(initialPath);
366339
if (!(path = normalize(path)).startsWith("/")) throw new Error("Invalid path: " + initialPath);
367340
if (path.endsWith("/")) path += "index";
368-
path += ".md";
341+
path = join(dirname(path), basename(path, ".html") + ".md");
369342
const source = await readFile(join(root, path), "utf8");
370343
const page = parseMarkdown(source, {path, ...config});
371344
const resolvers = await getResolvers(page, {root, path});

src/render.ts

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@ type RenderInternalOptions =
2424
| {preview: true}; // preview
2525

2626
export async function renderPage(page: MarkdownPage, options: RenderOptions & RenderInternalOptions): Promise<string> {
27-
const {root, base, path, pages, title, preview, search, resolvers = await getResolvers(page, options)} = options;
28-
const sidebar = page.data?.sidebar !== undefined ? Boolean(page.data.sidebar) : options.sidebar;
29-
const toc = mergeToc(page.data?.toc, options.toc);
30-
const draft = Boolean(page.data?.draft);
27+
const {data} = page;
28+
const {root, md, base, path, pages, title, preview, search, resolvers = await getResolvers(page, options)} = options;
29+
const {normalizeLink} = md;
30+
const sidebar = data?.sidebar !== undefined ? Boolean(data.sidebar) : options.sidebar;
31+
const toc = mergeToc(data?.toc, options.toc);
32+
const draft = Boolean(data?.draft);
3133
const {files, resolveFile, resolveImport} = resolvers;
3234
return String(html`<!DOCTYPE html>
3335
<meta charset="utf-8">${path === "/404" ? html`\n<base href="${preview ? "/" : base}">` : ""}
@@ -55,29 +57,29 @@ if (location.pathname.endsWith("/")) {
5557
import ${preview || page.code.length ? `{${preview ? "open, " : ""}define} from ` : ""}${JSON.stringify(
5658
resolveImport("observablehq:client")
5759
)};${
58-
files.size || page.data?.sql
59-
? `\nimport {registerFile${page.data?.sql ? ", FileAttachment" : ""}} from ${JSON.stringify(
60+
files.size || data?.sql
61+
? `\nimport {registerFile${data?.sql ? ", FileAttachment" : ""}} from ${JSON.stringify(
6062
resolveImport("observablehq:stdlib")
6163
)};`
6264
: ""
65+
}${data?.sql ? `\nimport {registerTable} from ${JSON.stringify(resolveImport("npm:@observablehq/duckdb"))};` : ""}${
66+
files.size ? `\n${renderFiles(files, resolveFile)}` : ""
6367
}${
64-
page.data?.sql ? `\nimport {registerTable} from ${JSON.stringify(resolveImport("npm:@observablehq/duckdb"))};` : ""
65-
}${files.size ? `\n${renderFiles(files, resolveFile)}` : ""}${
66-
page.data?.sql
67-
? `\n${Object.entries<string>(page.data.sql)
68+
data?.sql
69+
? `\n${Object.entries<string>(data.sql)
6870
.map(([name, source]) => `registerTable(${JSON.stringify(name)}, FileAttachment(${JSON.stringify(source)}));`)
6971
.join("\n")}`
7072
: ""
7173
}
7274
${preview ? `\nopen({hash: ${JSON.stringify(resolvers.hash)}, eval: (body) => eval(body)});\n` : ""}${page.code
7375
.map(({node, id}) => `\n${transpileJavaScript(node, {id, resolveImport})}`)
7476
.join("")}`)}
75-
</script>${sidebar ? html`\n${await renderSidebar(title, pages, root, path, search)}` : ""}${
77+
</script>${sidebar ? html`\n${await renderSidebar(title, pages, root, path, search, normalizeLink)}` : ""}${
7678
toc.show ? html`\n${renderToc(findHeaders(page), toc.label)}` : ""
7779
}
78-
<div id="observablehq-center">${renderHeader(options, page.data)}
80+
<div id="observablehq-center">${renderHeader(options, data)}
7981
<main id="observablehq-main" class="observablehq${draft ? " observablehq--draft" : ""}">
80-
${html.unsafe(rewriteHtml(page.html, resolvers.resolveFile))}</main>${renderFooter(path, options, page.data)}
82+
${html.unsafe(rewriteHtml(page.html, resolvers.resolveFile))}</main>${renderFooter(path, options, data, normalizeLink)}
8183
</div>
8284
`);
8385
}
@@ -102,7 +104,8 @@ async function renderSidebar(
102104
pages: (Page | Section)[],
103105
root: string,
104106
path: string,
105-
search: boolean
107+
search: boolean,
108+
normalizeLink: (href: string) => string
106109
): Promise<Html> {
107110
return html`<input id="observablehq-sidebar-toggle" type="checkbox" title="Toggle sidebar">
108111
<label id="observablehq-sidebar-backdrop" for="observablehq-sidebar-toggle"></label>
@@ -111,7 +114,7 @@ async function renderSidebar(
111114
<label id="observablehq-sidebar-close" for="observablehq-sidebar-toggle"></label>
112115
<li class="observablehq-link${
113116
normalizePath(path) === "/index" ? " observablehq-link-active" : ""
114-
}"><a href="${relativePath(path, "/")}">${title}</a></li>
117+
}"><a href="${normalizeLink(relativePath(path, "/"))}">${title}</a></li>
115118
</ol>${
116119
search
117120
? html`\n <div id="observablehq-search"><input type="search" placeholder="Search"></div>
@@ -132,11 +135,15 @@ async function renderSidebar(
132135
: ""
133136
}>
134137
<summary>${p.name}</summary>
135-
<ol>${p.pages.map((p) => renderListItem(p, path))}
138+
<ol>${p.pages.map((p) => renderListItem(p, path, normalizeLink))}
136139
</ol>
137140
</details>`
138141
: "path" in p
139-
? html`${i > 0 && "pages" in pages[i - 1] ? html`\n </ol>\n <ol>` : ""}${renderListItem(p, path)}`
142+
? html`${i > 0 && "pages" in pages[i - 1] ? html`\n </ol>\n <ol>` : ""}${renderListItem(
143+
p,
144+
path,
145+
normalizeLink
146+
)}`
140147
: ""
141148
)}
142149
</ol>
@@ -175,14 +182,10 @@ function renderToc(headers: Header[], label: string): Html {
175182
</aside>`;
176183
}
177184

178-
function renderListItem(page: Page, path: string): Html {
185+
function renderListItem(page: Page, path: string, normalizeLink: (href: string) => string): Html {
179186
return html`\n <li class="observablehq-link${
180187
normalizePath(page.path) === path ? " observablehq-link-active" : ""
181-
}"><a href="${relativePath(path, prettyPath(page.path))}">${page.name}</a></li>`;
182-
}
183-
184-
function prettyPath(path: string): string {
185-
return path.replace(/\/index$/, "/") || "/";
188+
}"><a href="${normalizeLink(relativePath(path, page.path))}">${page.name}</a></li>`;
186189
}
187190

188191
function renderHead(
@@ -231,23 +234,26 @@ function renderHeader({header}: Pick<Config, "header">, data: MarkdownPage["data
231234
function renderFooter(
232235
path: string,
233236
options: Pick<Config, "pages" | "pager" | "title" | "footer">,
234-
data: MarkdownPage["data"]
237+
data: MarkdownPage["data"],
238+
normalizeLink: (href: string) => string
235239
): Html | null {
236240
let footer = options.footer;
237241
if (data?.footer !== undefined) footer = data?.footer;
238242
const link = options.pager ? findLink(path, options) : null;
239243
return link || footer
240-
? html`\n<footer id="observablehq-footer">${link ? renderPager(path, link) : ""}${
244+
? html`\n<footer id="observablehq-footer">${link ? renderPager(path, link, normalizeLink) : ""}${
241245
footer ? html`\n<div>${html.unsafe(footer)}</div>` : ""
242246
}
243247
</footer>`
244248
: null;
245249
}
246250

247-
function renderPager(path: string, {prev, next}: PageLink): Html {
248-
return html`\n<nav>${prev ? renderRel(path, prev, "prev") : ""}${next ? renderRel(path, next, "next") : ""}</nav>`;
251+
function renderPager(path: string, {prev, next}: PageLink, normalizeLink: (href: string) => string): Html {
252+
return html`\n<nav>${prev ? renderRel(path, prev, "prev", normalizeLink) : ""}${
253+
next ? renderRel(path, next, "next", normalizeLink) : ""
254+
}</nav>`;
249255
}
250256

251-
function renderRel(path: string, page: Page, rel: "prev" | "next"): Html {
252-
return html`<a rel="${rel}" href="${relativePath(path, prettyPath(page.path))}"><span>${page.name}</span></a>`;
257+
function renderRel(path: string, page: Page, rel: "prev" | "next", normalizeLink: (href: string) => string): Html {
258+
return html`<a rel="${rel}" href="${normalizeLink(relativePath(path, page.path))}"><span>${page.name}</span></a>`;
253259
}

src/search.ts

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import {readFile} from "node:fs/promises";
2-
import {basename, join} from "node:path/posix";
2+
import {basename, dirname, join} from "node:path/posix";
33
import he from "he";
44
import MiniSearch from "minisearch";
55
import type {Config} from "./config.js";
@@ -27,7 +27,7 @@ const indexOptions = {
2727
};
2828

2929
export async function searchIndex(config: Config, effects = defaultEffects): Promise<string> {
30-
const {root, pages, search} = config;
30+
const {root, pages, search, md} = config;
3131
if (!search) return "{}";
3232
if (indexCache.has(config) && indexCache.get(config).freshUntil > +new Date()) return indexCache.get(config).json;
3333

@@ -41,23 +41,20 @@ export async function searchIndex(config: Config, effects = defaultEffects): Pro
4141
// Index the pages
4242
const index = new MiniSearch(indexOptions);
4343
for await (const file of visitMarkdownFiles(root)) {
44-
const path = join(root, file);
45-
const source = await readFile(path, "utf8");
46-
const {html, title, data} = parseMarkdown(source, {...config, path: "/" + file.slice(0, -3)});
44+
const sourcePath = join(root, file);
45+
const source = await readFile(sourcePath, "utf8");
46+
const path = `/${join(dirname(file), basename(file, ".md"))}`;
47+
const {html, title, data} = parseMarkdown(source, {...config, path});
4748

4849
// Skip pages that opt-out of indexing, and skip unlisted pages unless
4950
// opted-in. We only log the first case.
50-
const listed = pagePaths.has(`/${file.slice(0, -3)}`);
51+
const listed = pagePaths.has(path);
5152
const indexed = data?.index === undefined ? listed : Boolean(data.index);
5253
if (!indexed) {
53-
if (listed) effects.logger.log(`${faint("index")} ${strikethrough(path)} ${faint("(skipped)")}`);
54+
if (listed) effects.logger.log(`${faint("index")} ${strikethrough(sourcePath)} ${faint("(skipped)")}`);
5455
continue;
5556
}
5657

57-
// This is the (top-level) serving path to the indexed page. There’s
58-
// implicitly a leading slash here.
59-
const id = file.slice(0, basename(file) === "index.md" ? -"index.md".length : -3);
60-
6158
// eslint-disable-next-line import/no-named-as-default-member
6259
const text = he
6360
.decode(
@@ -70,8 +67,8 @@ export async function searchIndex(config: Config, effects = defaultEffects): Pro
7067
.replaceAll(/[\u0300-\u036f]/g, "")
7168
.replace(/[^\p{L}\p{N}]/gu, " "); // keep letters & numbers
7269

73-
effects.logger.log(`${faint("index")} ${path}`);
74-
index.add({id, title, text, keywords: normalizeKeywords(data?.keywords)});
70+
effects.logger.log(`${faint("index")} ${sourcePath}`);
71+
index.add({id: md.normalizeLink(path).slice("/".length), title, text, keywords: normalizeKeywords(data?.keywords)});
7572
}
7673

7774
// Pass the serializable index options to the client.

0 commit comments

Comments
 (0)