From 6a4db35ead16b3ddcf68124e5d4b5c3336d85c10 Mon Sep 17 00:00:00 2001 From: Kian Newman-Hazel Date: Fri, 25 Apr 2025 16:32:36 +0100 Subject: [PATCH 1/4] [Docs Site] Add frontmatter to index.md output --- worker/index.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/worker/index.ts b/worker/index.ts index a7f43348c4b4568..99ec0a9e52809c0 100644 --- a/worker/index.ts +++ b/worker/index.ts @@ -13,10 +13,8 @@ const redirectsEvaluator = generateRedirectsEvaluator(redirectsFileContents, { export default class extends WorkerEntrypoint { override async fetch(request: Request) { if (request.url.endsWith("/index.md")) { - const res = await this.env.ASSETS.fetch( - request.url.replace("index.md", ""), - request, - ); + const htmlUrl = request.url.replace("index.md", ""); + const res = await this.env.ASSETS.fetch(htmlUrl, request); if (res.status === 404) { return res; From 7b7ce74fbc0295b8f00e76349511e3c5e524a642 Mon Sep 17 00:00:00 2001 From: Kian Newman-Hazel Date: Mon, 28 Apr 2025 16:59:26 +0100 Subject: [PATCH 2/4] rebase + update test --- src/middleware/index.ts | 2 +- src/util/markdown.ts | 25 +++++++++++++++++++++++-- worker/index.ts | 2 +- worker/index.worker.test.ts | 10 +++++++++- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/middleware/index.ts b/src/middleware/index.ts index 3bd502155a0cfa7..8f48dff6f288225 100644 --- a/src/middleware/index.ts +++ b/src/middleware/index.ts @@ -18,7 +18,7 @@ export const onRequest = defineMiddleware(async (context, next) => { const htmlUrl = new URL(pathname.replace("index.md", ""), context.url); const html = await (await fetch(htmlUrl)).text(); - const markdown = await htmlToMarkdown(html); + const markdown = await htmlToMarkdown(html, context.url.toString()); if (!markdown) { return new Response("Not Found", { status: 404 }); diff --git a/src/util/markdown.ts b/src/util/markdown.ts index b4a6abfa536e3f6..92d38ec87867680 100644 --- a/src/util/markdown.ts +++ b/src/util/markdown.ts @@ -10,8 +10,10 @@ import remarkStringify from "remark-stringify"; export async function htmlToMarkdown( html: string, + url: string, ): Promise { - const content = parse(html).querySelector(".sl-markdown-content"); + const dom = parse(html); + const content = dom.querySelector(".sl-markdown-content"); if (!content) { return; @@ -26,5 +28,24 @@ export async function htmlToMarkdown( remarkStringify, ]); - return markdown; + const title = dom.querySelector("title")?.textContent; + const description = dom.querySelector("meta[name='description']")?.attributes + .content; + const lastUpdated = dom.querySelector(".meta time")?.attributes.datetime; + + const withFrontmatter = [ + "---", + `title: ${title}`, + description ? `description: ${description}` : [], + lastUpdated ? `lastUpdated: ${lastUpdated}` : [], + `source_url:`, + ` html: ${url}`, + ` md: ${url.replace("index.md", "")}`, + "---\n", + markdown, + ] + .flat() + .join("\n"); + + return withFrontmatter; } diff --git a/worker/index.ts b/worker/index.ts index 99ec0a9e52809c0..22e024715ec7d82 100644 --- a/worker/index.ts +++ b/worker/index.ts @@ -26,7 +26,7 @@ export default class extends WorkerEntrypoint { ) { const html = await res.text(); - const markdown = await htmlToMarkdown(html); + const markdown = await htmlToMarkdown(html, request.url); if (!markdown) { return new Response("Not Found", { status: 404 }); diff --git a/worker/index.worker.test.ts b/worker/index.worker.test.ts index e91868d570a2769..13c81ecd6a68006 100644 --- a/worker/index.worker.test.ts +++ b/worker/index.worker.test.ts @@ -267,7 +267,15 @@ describe("Cloudflare Docs", () => { const text = await response.text(); expect(text).toMatchInlineSnapshot(` - "The HTML generated by this file is used as a test fixture for our Markdown generation. + "--- + title: Markdown · Cloudflare Style Guide + description: The HTML generated by this file is used as a test fixture for our Markdown generation. + source_url: + html: http://fakehost/style-guide/fixtures/markdown/index.md + md: http://fakehost/style-guide/fixtures/markdown/ + --- + + The HTML generated by this file is used as a test fixture for our Markdown generation. * mdx From 5ebe335b6cdbbf0d0d76abf286811677bdb836a0 Mon Sep 17 00:00:00 2001 From: Kian Newman-Hazel Date: Tue, 29 Apr 2025 12:14:29 +0100 Subject: [PATCH 3/4] fix title and desc --- src/components/overrides/Head.astro | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/components/overrides/Head.astro b/src/components/overrides/Head.astro index 9f321e4cc095cff..19d955c7c87605c 100644 --- a/src/components/overrides/Head.astro +++ b/src/components/overrides/Head.astro @@ -7,6 +7,7 @@ import { getEntry } from "astro:content"; import { getOgImage } from "~/util/og"; import type { CollectionEntry } from "astro:content"; +const DEFAULT_TITLE_DELIMITER = "|"; const NOINDEX_PRODUCTS = ["email-security", "style-guide", "security"]; const currentSection = Astro.url.pathname.split("/")[1].replaceAll(".", ""); @@ -31,7 +32,9 @@ if (currentSection) { let title: string; if (titleIdx !== -1) { - const existingTitle = head[titleIdx].content; + const existingTitle = head[titleIdx].content?.split( + ` ${DEFAULT_TITLE_DELIMITER} `, + )[0]; title = `${existingTitle} · ${product.data.meta.title}`; head[titleIdx] = { @@ -80,6 +83,26 @@ if (shouldNoIndex) { }); } +if ( + frontmatter.description && + head.findIndex( + ({ tag, attrs }) => tag === "meta" && attrs?.name === "description", + ) === -1 +) { + const existingOpenGraphTag = head.findIndex( + ({ tag, attrs }) => tag === "meta" && attrs?.property === "og:description", + ); + + if (existingOpenGraphTag !== -1) { + head[existingOpenGraphTag].attrs!.content = frontmatter.description; + } + + metaTags.push({ + name: "description", + content: frontmatter.description as string, + }); +} + if (frontmatter.pcx_content_type) { ["pcx_content_type", "algolia_content_type"].map((name) => { metaTags.push({ From 5e2c091da99d3806e4f8603622b6e98e53566a2d Mon Sep 17 00:00:00 2001 From: Kian Newman-Hazel Date: Tue, 29 Apr 2025 13:41:08 +0100 Subject: [PATCH 4/4] hardcode lastUpdated --- src/content/docs/style-guide/fixtures/markdown.mdx | 1 + worker/index.worker.test.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/src/content/docs/style-guide/fixtures/markdown.mdx b/src/content/docs/style-guide/fixtures/markdown.mdx index 0befccb7f1e18be..0e5f02a61700e76 100644 --- a/src/content/docs/style-guide/fixtures/markdown.mdx +++ b/src/content/docs/style-guide/fixtures/markdown.mdx @@ -1,6 +1,7 @@ --- title: Markdown noindex: true +lastUpdated: 2025-01-01T00:00:00Z sidebar: hidden: true --- diff --git a/worker/index.worker.test.ts b/worker/index.worker.test.ts index 13c81ecd6a68006..2f09b7906a600d8 100644 --- a/worker/index.worker.test.ts +++ b/worker/index.worker.test.ts @@ -270,6 +270,7 @@ describe("Cloudflare Docs", () => { "--- title: Markdown · Cloudflare Style Guide description: The HTML generated by this file is used as a test fixture for our Markdown generation. + lastUpdated: 2025-01-01T00:00:00.000Z source_url: html: http://fakehost/style-guide/fixtures/markdown/index.md md: http://fakehost/style-guide/fixtures/markdown/