From 03be76107ca875898361edfa75c0c9fae455b38b Mon Sep 17 00:00:00 2001 From: Kian Newman-Hazel Date: Wed, 22 Jan 2025 14:38:21 +0000 Subject: [PATCH] [Docs Site] Decode HTML entities and strip Markdown from page descriptions --- package-lock.json | 33 +++++++++++++++++ package.json | 2 ++ src/components/overrides/Head.astro | 2 +- .../docs/workers/languages/rust/index.mdx | 2 +- src/util/description.ts | 36 ++++++++++++------- 5 files changed, 61 insertions(+), 14 deletions(-) diff --git a/package-lock.json b/package-lock.json index b8eeeb6ebf16f1..dce2df925b16a8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -69,11 +69,13 @@ "rehype-external-links": "3.0.0", "rehype-mermaid": "3.0.0", "rehype-title-figure": "0.1.2", + "remark": "15.0.1", "sharp": "0.33.5", "solarflare-theme": "0.0.2", "starlight-image-zoom": "0.9.0", "starlight-links-validator": "0.14.1", "starlight-package-managers": "0.9.0", + "strip-markdown": "6.0.0", "svgo": "3.3.2", "tailwindcss": "3.4.17", "tippy.js": "6.3.7", @@ -17272,6 +17274,23 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/remark": { + "version": "15.0.1", + "resolved": "https://registry.npmjs.org/remark/-/remark-15.0.1.tgz", + "integrity": "sha512-Eht5w30ruCXgFmxVUSlNWQ9iiimq07URKeFS3hNc8cUWy1llX4KDWfyEDZRycMc+znsN9Ux5/tJ/BFdgdOwA3A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "remark-parse": "^11.0.0", + "remark-stringify": "^11.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/remark-directive": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/remark-directive/-/remark-directive-3.0.0.tgz", @@ -18620,6 +18639,20 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/strip-markdown": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/strip-markdown/-/strip-markdown-6.0.0.tgz", + "integrity": "sha512-mSa8FtUoX3ExJYDkjPUTC14xaBAn4Ik5GPQD45G5E2egAmeV3kHgVSTfIoSDggbF6Pk9stahVgqsLCNExv6jHw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/strnum": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", diff --git a/package.json b/package.json index 261f288afd7027..4a58ac67daceaf 100644 --- a/package.json +++ b/package.json @@ -88,11 +88,13 @@ "rehype-external-links": "3.0.0", "rehype-mermaid": "3.0.0", "rehype-title-figure": "0.1.2", + "remark": "15.0.1", "sharp": "0.33.5", "solarflare-theme": "0.0.2", "starlight-image-zoom": "0.9.0", "starlight-links-validator": "0.14.1", "starlight-package-managers": "0.9.0", + "strip-markdown": "6.0.0", "svgo": "3.3.2", "tailwindcss": "3.4.17", "tippy.js": "6.3.7", diff --git a/src/components/overrides/Head.astro b/src/components/overrides/Head.astro index a6fa9cd99f54c3..6afc5b959e000e 100644 --- a/src/components/overrides/Head.astro +++ b/src/components/overrides/Head.astro @@ -139,7 +139,7 @@ if (currentSection) { } } -Astro.props.entry.data.description ??= await getPageDescription( +Astro.props.entry.data.description = await getPageDescription( // @ts-expect-error TODO: improve types Astro.props.entry, Astro.locals, diff --git a/src/content/docs/workers/languages/rust/index.mdx b/src/content/docs/workers/languages/rust/index.mdx index db3b2022087319..8c87688d83be74 100644 --- a/src/content/docs/workers/languages/rust/index.mdx +++ b/src/content/docs/workers/languages/rust/index.mdx @@ -8,7 +8,7 @@ sidebar: head: - tag: title content: Cloudflare Workers — Rust language support -description: Write Workers in 100% Rust using the [`workers-rs`crate](https://github.com/cloudflare/workers-rs) +description: Write Workers in 100% Rust using the [`workers-rs` crate](https://github.com/cloudflare/workers-rs) --- Cloudflare Workers provides support for Rust via the [`workers-rs` crate](https://github.com/cloudflare/workers-rs), which makes [Runtime APIs](/workers/runtime-apis) and [bindings](/workers/runtime-apis/bindings/) to developer platform products, such as [Workers KV](/kv/concepts/how-kv-works/), [R2](/r2/), and [Queues](/queues/), available directly from your Rust code. diff --git a/src/util/description.ts b/src/util/description.ts index 517f73ce37a669..c694583bbb3aad 100644 --- a/src/util/description.ts +++ b/src/util/description.ts @@ -1,25 +1,37 @@ import type { CollectionEntry } from "astro:content"; import { parse } from "node-html-parser"; import { entryToString } from "./container"; -/* - 1. If there is a `description` property in the frontmatter, return that. - 2. If there is a `

...

` element in the HTML, return that. - 3. Return `undefined` to signal to consumers there is no suitable description. -*/ +import { remark } from "remark"; +import strip from "strip-markdown"; +import he from "he"; + +/** + * Generates a plain-text description for use in the `description` and `og:description` meta tags. + * + * 1. If there is a `description` property in the frontmatter, strip any Markdown tokens and return. + * 2. If there is a `

...

` element in the HTML, decode any HTML entities and return that. + * 3. Return `undefined` to signal to consumers there is no suitable description. + */ export async function getPageDescription( entry: CollectionEntry<"docs">, locals: any, ) { - if (entry.data.description) return entry.data.description; + let description = undefined; + + if (entry.data.description) { + const file = await remark().use(strip).process(entry.data.description); - const html = await entryToString(entry, locals); + description = file.toString(); + } else { + const html = await entryToString(entry, locals); - if (!html) return undefined; + if (!html) return undefined; - const dom = parse(html); - const description = dom.querySelector(":root > p"); + const dom = parse(html); + const paragraph = dom.querySelector(":root > p"); - if (description) return description.innerText; + if (paragraph) description = he.decode(paragraph.innerText); + } - return undefined; + return description?.replaceAll(" ↗", "").trim(); }