Skip to content

Commit c711346

Browse files
KianNHthomasgauvin
authored andcommitted
[Docs Site] Use vendored Markdown for llms-full.txt (#23686)
* [Docs Site] Use vendored Markdown for llms-full.txt * remove tests as llms-full is now a post-build thing * unused import
1 parent 833a6a4 commit c711346

File tree

8 files changed

+63
-182
lines changed

8 files changed

+63
-182
lines changed

.github/workflows/publish-production.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ jobs:
4141
cd distmd && zip -r markdown.zip .
4242
npx wrangler r2 object put vendored-markdown/markdown.zip --file=markdown.zip --remote
4343
rm markdown.zip
44+
45+
cd distllms
46+
for file in $(find . -type f); do
47+
npx wrangler r2 object put vendored-markdown/$file --file=$file --remote
48+
done
4449
- name: Upload vendored Markdown files to ZT DevDocs bucket
4550
env:
4651
AWS_ACCESS_KEY_ID: ${{ secrets.ZT_DEVDOCS_ACCESS_KEY_ID }}

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# build output
22
dist/
33
distmd/
4+
distllms/
45
# generated types
56
.astro/
67

bin/generate-index-md.ts

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,23 @@
1-
import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
1+
import {
2+
readFileSync,
3+
writeFileSync,
4+
mkdirSync,
5+
appendFileSync,
6+
} from "node:fs";
27

38
import glob from "fast-glob";
49
import { parse } from "node-html-parser";
510
import { htmlToMarkdown } from "~/util/markdown";
611

12+
import YAML from "yaml";
13+
714
const files = await glob("dist/**/*.html");
815

916
for (const file of files) {
17+
if (file === "dist/index.html" || file === "dist/404.html") {
18+
continue;
19+
}
20+
1021
const html = readFileSync(file, "utf-8");
1122
const dom = parse(html);
1223

@@ -24,9 +35,42 @@ for (const file of files) {
2435
continue;
2536
}
2637

38+
const product = file.split("/")[1];
2739
const path = file.replace("dist/", "distmd/").replace(".html", ".md");
2840

2941
mkdirSync(path.split("/").slice(0, -1).join("/"), { recursive: true });
30-
3142
writeFileSync(path, markdown);
43+
44+
const llmsFullContent = ["<page>", markdown, "</page>\n\n"].join("\n");
45+
46+
mkdirSync(`distllms/${product}`, { recursive: true });
47+
appendFileSync("distllms/llms-full.txt", llmsFullContent);
48+
appendFileSync(`distllms/${product}/llms-full.txt`, llmsFullContent);
49+
50+
try {
51+
const path = await glob(`src/content/products/${product}.*`).then((arr) =>
52+
arr.at(0),
53+
);
54+
55+
if (!path) {
56+
continue;
57+
}
58+
59+
const yaml = YAML.parse(readFileSync(path, "utf-8"));
60+
const group = yaml.product?.group?.replaceAll(" ", "-").toLowerCase();
61+
62+
if (!group) {
63+
continue;
64+
}
65+
66+
mkdirSync(`distllms/${group}`, { recursive: true });
67+
appendFileSync(`distllms/${group}/llms-full.txt`, llmsFullContent);
68+
} catch (error) {
69+
if (error instanceof Error) {
70+
console.error(
71+
`Failed to find a product group for ${product}:`,
72+
error.message,
73+
);
74+
}
75+
}
3276
}

src/pages/[area]/llms-full.txt.ts

Lines changed: 0 additions & 62 deletions
This file was deleted.

src/pages/[product]/llms-full.txt.ts

Lines changed: 0 additions & 50 deletions
This file was deleted.

src/pages/llms-full.txt.ts

Lines changed: 0 additions & 33 deletions
This file was deleted.

worker/index.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@ export default class extends WorkerEntrypoint<Env> {
2222
});
2323
}
2424

25+
if (request.url.endsWith("/llms-full.txt")) {
26+
const { pathname } = new URL(request.url);
27+
const res = await this.env.VENDORED_MARKDOWN.get(pathname.slice(1));
28+
29+
return new Response(res?.body, {
30+
headers: {
31+
"Content-Type": "text/markdown; charset=utf-8",
32+
},
33+
});
34+
}
35+
2536
if (request.url.endsWith("/index.md")) {
2637
const htmlUrl = request.url.replace("index.md", "");
2738
const res = await this.env.ASSETS.fetch(htmlUrl, request);

worker/index.worker.test.ts

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -204,41 +204,6 @@ describe("Cloudflare Docs", () => {
204204
const text = await response.text();
205205
expect(text).toContain("# Cloudflare Developer Documentation");
206206
});
207-
208-
it("llms-full.txt", async () => {
209-
const request = new Request("http://fakehost/llms-full.txt");
210-
const response = await SELF.fetch(request);
211-
212-
expect(response.status).toBe(200);
213-
214-
const text = await response.text();
215-
expect(text).toContain("URL: https://developers.cloudflare.com/");
216-
expect(text).toContain('from "~/components"');
217-
});
218-
219-
it("product-specific llms-full.txt", async () => {
220-
const request = new Request("http://fakehost/workers/llms-full.txt");
221-
const response = await SELF.fetch(request);
222-
223-
expect(response.status).toBe(200);
224-
225-
const text = await response.text();
226-
expect(text).toContain("URL: https://developers.cloudflare.com/");
227-
expect(text).toContain('from "~/components"');
228-
});
229-
230-
it("area-specific llms-full.txt", async () => {
231-
const request = new Request(
232-
"http://fakehost/developer-platform/llms-full.txt",
233-
);
234-
const response = await SELF.fetch(request);
235-
236-
expect(response.status).toBe(200);
237-
238-
const text = await response.text();
239-
expect(text).toContain("URL: https://developers.cloudflare.com/");
240-
expect(text).toContain('from "~/components"');
241-
});
242207
});
243208

244209
describe("index.md handling", () => {

0 commit comments

Comments
 (0)