diff --git a/.github/workflows/publish-production.yml b/.github/workflows/publish-production.yml index 8b54b3cf5014f47..f8f957bf553bf9a 100644 --- a/.github/workflows/publish-production.yml +++ b/.github/workflows/publish-production.yml @@ -32,6 +32,13 @@ jobs: name: Deploy to Cloudflare Workers env: CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + - name: Build vendored Markdown archive + env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + run: | + npx tsx bin/generate-index-md.ts + cd distmd && zip -r markdown.zip . + npx wrangler r2 object put vendored-markdown/markdown.zip --file=markdown.zip --remote - uses: actions/cache/save@v4 if: always() with: diff --git a/.gitignore b/.gitignore index 33301b4013d5966..e3de49811f983d3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # build output dist/ +distmd/ # generated types .astro/ diff --git a/bin/generate-index-md.ts b/bin/generate-index-md.ts new file mode 100644 index 000000000000000..276a4e9b337fe70 --- /dev/null +++ b/bin/generate-index-md.ts @@ -0,0 +1,32 @@ +import { readFileSync, writeFileSync, mkdirSync } from "node:fs"; + +import glob from "fast-glob"; +import { parse } from "node-html-parser"; +import { htmlToMarkdown } from "~/util/markdown"; + +const files = await glob("dist/**/*.html"); + +for (const file of files) { + const html = readFileSync(file, "utf-8"); + const dom = parse(html); + + const url = dom + .querySelector("link[rel='alternate'][type='text/markdown']") + ?.getAttribute("href"); + + if (!url) { + continue; + } + + const markdown = await htmlToMarkdown(html, url); + + if (!markdown) { + continue; + } + + const path = file.replace("dist/", "distmd/").replace(".html", ".md"); + + mkdirSync(path.split("/").slice(0, -1).join("/"), { recursive: true }); + + writeFileSync(path, markdown); +} diff --git a/src/components/overrides/Head.astro b/src/components/overrides/Head.astro index 8cc1ddff61e004e..175a8e4f25d1568 100644 --- a/src/components/overrides/Head.astro +++ b/src/components/overrides/Head.astro @@ -182,7 +182,7 @@ head.push({ attrs: { rel: "alternate", type: "text/markdown", - href: Astro.url.pathname + "index.md", + href: Astro.site + Astro.url.pathname.slice(1) + "index.md", }, }); diff --git a/src/pages/llms.txt.ts b/src/pages/llms.txt.ts index 30727ebfcda9c55..baca0af973d2a97 100644 --- a/src/pages/llms.txt.ts +++ b/src/pages/llms.txt.ts @@ -31,6 +31,9 @@ export const GET: APIRoute = async () => { Easily build and deploy full-stack applications everywhere, thanks to integrated compute, storage, and networking. + > [!TIP] + > An archive of Markdown files is available at https://developers.cloudflare.com/markdown.zip + ${grouped .map(([product, entries]) => { return dedent(` @@ -38,7 +41,7 @@ export const GET: APIRoute = async () => { ${entries ?.map((e) => { - const line = `- [${e.data.title}](https://developers.cloudflare.com/${e.id}/)`; + const line = `- [${e.data.title}](https://developers.cloudflare.com/${e.id}/index.md)`; const description = e.data.description; diff --git a/src/util/markdown.ts b/src/util/markdown.ts index 92d38ec87867680..2605c2e9944b8d0 100644 --- a/src/util/markdown.ts +++ b/src/util/markdown.ts @@ -39,8 +39,8 @@ export async function htmlToMarkdown( description ? `description: ${description}` : [], lastUpdated ? `lastUpdated: ${lastUpdated}` : [], `source_url:`, - ` html: ${url}`, - ` md: ${url.replace("index.md", "")}`, + ` html: ${url.replace("index.md", "")}`, + ` md: ${url}`, "---\n", markdown, ] diff --git a/worker/index.ts b/worker/index.ts index 22e024715ec7d82..35edf875b2008c5 100644 --- a/worker/index.ts +++ b/worker/index.ts @@ -12,6 +12,16 @@ const redirectsEvaluator = generateRedirectsEvaluator(redirectsFileContents, { export default class extends WorkerEntrypoint { override async fetch(request: Request) { + if (request.url.endsWith("/markdown.zip")) { + const res = await this.env.VENDORED_MARKDOWN.get("markdown.zip"); + + return new Response(res?.body, { + headers: { + "Content-Type": "application/zip", + }, + }); + } + if (request.url.endsWith("/index.md")) { const htmlUrl = request.url.replace("index.md", ""); const res = await this.env.ASSETS.fetch(htmlUrl, request); diff --git a/worker/index.worker.test.ts b/worker/index.worker.test.ts index b1354d03ff4c6e5..931d1d0e6f4b684 100644 --- a/worker/index.worker.test.ts +++ b/worker/index.worker.test.ts @@ -273,8 +273,8 @@ describe("Cloudflare Docs", () => { description: The HTML generated by this file is used as a test fixture for our Markdown generation. lastUpdated: 2025-01-01T00:00:00.000Z source_url: - html: http://fakehost/style-guide/fixtures/markdown/index.md - md: http://fakehost/style-guide/fixtures/markdown/ + html: http://fakehost/style-guide/fixtures/markdown/ + md: http://fakehost/style-guide/fixtures/markdown/index.md --- The HTML generated by this file is used as a test fixture for our Markdown generation. @@ -327,7 +327,9 @@ describe("Cloudflare Docs", () => { "link[rel='alternate'][type='text/markdown']", )?.attributes.href; - expect(markdown).toBe("/workers/index.md"); + expect(markdown).toBe( + "https://developers.cloudflare.com/workers/index.md", + ); }); it("og:image tag", () => { diff --git a/worker/worker-configuration.d.ts b/worker/worker-configuration.d.ts index abe023a32ed16cc..232ebd7f9e4cf94 100644 --- a/worker/worker-configuration.d.ts +++ b/worker/worker-configuration.d.ts @@ -2,6 +2,7 @@ interface Env { ASSETS: Fetcher; + VENDORED_MARKDOWN: R2Bucket; } declare module "*/__redirects" { const value: string; diff --git a/wrangler.toml b/wrangler.toml index 8f06f88cc4437e8..5717264318f9e3b 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -16,4 +16,8 @@ rules = [ directory = "./dist" binding = "ASSETS" not_found_handling = "404-page" -run_worker_first = true \ No newline at end of file +run_worker_first = true + +[[r2_buckets]] +binding = "VENDORED_MARKDOWN" +bucket_name = "vendored-markdown" \ No newline at end of file