Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
1e18609
wip
khromov Dec 9, 2024
d665b86
Update +server.ts
khromov Dec 9, 2024
d6041ce
Update +server.ts
khromov Dec 9, 2024
b74efca
wip
khromov Dec 10, 2024
437464f
wip
khromov Dec 10, 2024
15ed068
Update +server.ts
khromov Dec 10, 2024
98a9f90
Update +server.ts
khromov Dec 10, 2024
16046c0
wip
khromov Dec 10, 2024
c2534f9
cleanup
khromov Dec 10, 2024
0d29584
wip
khromov Dec 10, 2024
6d05a5a
refactor
khromov Dec 10, 2024
464630f
cleanupo
khromov Dec 10, 2024
4238887
Update +server.ts
khromov Dec 10, 2024
d494334
Update content.ts
khromov Dec 10, 2024
4315f7a
wip
khromov Dec 10, 2024
7bb3ebd
Create +server.ts
khromov Dec 10, 2024
8612605
minimize llms.txt
khromov Dec 10, 2024
8b2544c
Filter llms.txt
khromov Dec 10, 2024
02dbf8d
clean up
khromov Dec 10, 2024
3470a0b
Merge branch 'main' into llms-txt
khromov Dec 11, 2024
7d89403
package
khromov Dec 11, 2024
918b627
chore: naming
khromov Dec 11, 2024
b327991
Update +server.ts
khromov Dec 11, 2024
e51cf30
Dynamic path names
khromov Dec 11, 2024
ea0c646
clean up
khromov Dec 11, 2024
08f1aea
fix
khromov Dec 11, 2024
13e1cc5
Merge branch 'main' into llms-txt
dummdidumm Dec 12, 2024
c1e57a2
under_score
dummdidumm Dec 12, 2024
190ff05
code style
dummdidumm Dec 12, 2024
72a1fd1
use real document titles, filter out empty files
dummdidumm Dec 12, 2024
aaee7c6
move llms.txt to llms-small.txt
khromov Dec 12, 2024
a7f0f8d
llms.txt index
khromov Dec 12, 2024
c5fac95
Update +server.ts
khromov Dec 12, 2024
29b4726
Update +server.ts
khromov Dec 12, 2024
9eeaca6
Fix index
khromov Dec 12, 2024
9c8469c
fix
khromov Dec 12, 2024
e9d7e70
revert VERCEL_URL usage
khromov Dec 12, 2024
855a197
Update apps/svelte.dev/src/lib/server/content.ts
khromov Dec 12, 2024
f0c91cc
move llm stuff into its own module
Rich-Harris Dec 13, 2024
086c48f
revert whitespace changes
Rich-Harris Dec 13, 2024
d7f9180
snake_case
Rich-Harris Dec 13, 2024
d541799
tweak
Rich-Harris Dec 13, 2024
8137d82
snake_case etc
Rich-Harris Dec 13, 2024
edc5d43
make ignores work
Rich-Harris Dec 13, 2024
bdf0381
simplify
Rich-Harris Dec 13, 2024
e7bd8d9
unused
Rich-Harris Dec 13, 2024
f5df782
reduce indirection
Rich-Harris Dec 13, 2024
aa3a4e6
more
Rich-Harris Dec 13, 2024
84ed768
move template into separate .md file
Rich-Harris Dec 13, 2024
b2233b8
add a section to /docs
Rich-Harris Dec 13, 2024
c717f6c
Merge branch 'main' into llms-txt
Rich-Harris Dec 13, 2024
a521cc9
advent of svelte
Rich-Harris Dec 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/svelte.dev/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
"lightningcss": "^1.25.1",
"magic-string": "^0.30.11",
"marked": "^14.1.2",
"minimatch": "^10.0.1",
"prettier": "^3.3.2",
"prettier-plugin-svelte": "^3.2.4",
"satori": "^0.10.13",
Expand Down
209 changes: 209 additions & 0 deletions apps/svelte.dev/src/lib/server/content.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { dev } from '$app/environment';
import { read } from '$app/server';
import type { Document } from '@sveltejs/site-kit';
import { create_index } from '@sveltejs/site-kit/server/content';
import { minimatch } from 'minimatch';

const documents = import.meta.glob<string>('../../../content/**/*.md', {
eager: true,
Expand All @@ -14,6 +16,12 @@ const assets = import.meta.glob<string>('../../../content/**/+assets/**', {
import: 'default'
});

export const documentsContent = import.meta.glob<string>('../../../content/**/*.md', {
eager: true,
query: '?raw',
import: 'default'
});

// https://github.com/vitejs/vite/issues/17453
export const index = await create_index(documents, assets, '../../../content', read);

Expand Down Expand Up @@ -123,3 +131,204 @@ function create_docs() {
export const docs = create_docs();

export const examples = index.examples.children;

function getSectionPriority(path: string): number {
if (path.includes('/docs/svelte/')) return 0;
if (path.includes('/docs/kit/')) return 1;
if (path.includes('/docs/cli/')) return 2;
return 3;
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to ensure ordering in the combined docs.


export function sortPaths(paths: string[]): string[] {
return paths.sort((a, b) => {
// First compare by section priority
const priorityA = getSectionPriority(a);
const priorityB = getSectionPriority(b);
if (priorityA !== priorityB) return priorityA - priorityB;

// Get directory paths
const dirA = a.split('/').slice(0, -1).join('/');
const dirB = b.split('/').slice(0, -1).join('/');

// If in the same directory, prioritize index.md
if (dirA === dirB) {
if (a.endsWith('index.md')) return -1;
if (b.endsWith('index.md')) return 1;
return a.localeCompare(b);
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to ensure that eg docs/svelte/01-introduction/index.md comes before docs/svelte/01-introduction/01-overview.md


// Otherwise sort by directory path
return dirA.localeCompare(dirB);
});
}

export const packages = ['svelte', 'kit', 'cli'] as const;
export type Package = (typeof packages)[number];

const DOCUMENTATION_NAMES: Record<Package, string> = {
svelte: 'Svelte',
kit: 'SvelteKit',
cli: 'Svelte CLI'
};

export function getDocumentationTitle(type: Package): string {
return `This is the developer documentation for ${DOCUMENTATION_NAMES[type]}.`;
}

export function getDocumentationStartTitle(type: Package): string {
return `# Start of ${DOCUMENTATION_NAMES[type]} documentation`;
}

export function filterDocsByPackage(
allDocs: Record<string, string>,
type: Package
): Record<string, string> {
const filtered: Record<string, string> = {};

for (const [path, content] of Object.entries(allDocs)) {
if (path.toLowerCase().includes(`/docs/${type}/`)) {
filtered[path] = content;
}
}

return filtered;
}

interface MinimizeOptions {
removeLegacy: boolean;
removeNoteBlocks: boolean;
removeDetailsBlocks: boolean;
removePlaygroundLinks: boolean;
removePrettierIgnore: boolean;
normalizeWhitespace: boolean;
}

const defaultOptions: MinimizeOptions = {
removeLegacy: false,
removeNoteBlocks: false,
removeDetailsBlocks: false,
removePlaygroundLinks: false,
removePrettierIgnore: false,
normalizeWhitespace: false
};

function removeQuoteBlocks(content: string, blockType: string): string {
return content
.split('\n')
.reduce((acc: string[], line: string, index: number, lines: string[]) => {
// If we find a block (with or without additional text), skip it and all subsequent blockquote lines
if (line.trim().startsWith(`> [!${blockType}]`)) {
// Skip all subsequent lines that are part of the blockquote
let i = index;
while (i < lines.length && (lines[i].startsWith('>') || lines[i].trim() === '')) {
i++;
}
// Update the index to skip all these lines
index = i - 1;
return acc;
}

// Only add the line if it's not being skipped
acc.push(line);
return acc;
}, [])
.join('\n');
}

function minimizeContent(content: string, options?: Partial<MinimizeOptions>): string {
// Merge with defaults, but only for properties that are defined
const settings: MinimizeOptions = options ? { ...defaultOptions, ...options } : defaultOptions;

let minimized = content;

if (settings.removeLegacy) {
minimized = removeQuoteBlocks(minimized, 'LEGACY');
}

if (settings.removeNoteBlocks) {
minimized = removeQuoteBlocks(minimized, 'NOTE');
}

if (settings.removeDetailsBlocks) {
minimized = removeQuoteBlocks(minimized, 'DETAILS');
}

if (settings.removePlaygroundLinks) {
// Replace playground URLs with /[link] but keep the original link text
minimized = minimized.replace(/\[([^\]]+)\]\(\/playground[^)]+\)/g, '[$1](/REMOVED)');
}

if (settings.removePrettierIgnore) {
minimized = minimized
.split('\n')
.filter((line) => line.trim() !== '<!-- prettier-ignore -->')
.join('\n');
}

if (settings.normalizeWhitespace) {
minimized = minimized.replace(/\s+/g, ' ');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this whitespace normalization messes up the markdown, as all newlines are removed. What was the reason to add this, in which way do llms benefit from this? Or is this just about removing as much extra content as possible? (in which case, I'm not sure if this helps / is doable, because of the markdown rules)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if these llms count tokens and whitespace isn't counted, then i am not sure we need this at all and should rather retain the more readable format for easier human consumption and possible markdown rendering to html?

Copy link
Contributor Author

@khromov khromov Dec 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👋 Spaces are counted as tokens, so the reason to remove whitespace is purely to reduce the token count. That's why we apply the whitespace transformation to /llms.txt but not to /llms-full.txt in this PR.

As for raw numbers, I ran the current llms.txt through OpenAI tokenizer to test the effect:

Without normalizeWhitespace:
Tokens 197,260
Characters 797,617

With normalizeWhitespace:
Tokens 188,595
Characters 781,754

The reduction for token count is ~4.4%.

As for the invalid markdown, since LLMs don't really parse Markdown, it does not need to have a detrimental effect on performance. As for the performance, there is no real way to measure without creating a "SvelteBench" benchmark that compares minified vs unminified and test it for all major LLMs.

From my anecdotal evidence of having used the content.json file from the v4 site for 6+ months for LLM coding (which is half HTML encoded 😅 ) I haven't found a huge performance difference vs the text version of the docs.

What does make a difference is whether you can fit the file into context or not. If you are using a full context approach (as shown in my recent YouTube video) then it is absolutely required for the text file to fit inside the LLM context, otherwise you cannot use it at all. That's why I've made a compromise with a smaller llms.txt (which I think we should slim down further with time, but this sets a starting point) and a larger llms-full.txt for people who have huge context or use other approaches like RAG to search the docs, where the size does not matter as much.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to https://llmstxt.org/ the main llms.txt would provide only a short overview along with links to more details (in our case, we could link to the different packages). It would also remove the need for llms-full.txt. Why do we need both then?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👋 I mentioned this in another comment, I will change it to have an index.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👋 Spaces are counted as tokens, so the reason to remove whitespace is purely to reduce the token count.

if whitespace counts as tokens it means make an actual difference in interpretation. esp removing newlines could end up turning a list into a single line thats "parsed" differently.

is there a way to compare both variants? would leaving newlines in only be a reasonable compromise?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

minimized = minimized.trim();

return minimized;
}

function shouldIncludeFile(filename: string, ignore: string[] = []): boolean {
const shouldIgnore = ignore.some((pattern) => minimatch(filename, pattern));
if (shouldIgnore) {
if (dev) console.log(`❌ Ignored by pattern: ${filename}`);
return false;
}

return true;
}

interface GenerateContentOptions {
prefix?: string;
ignore?: string[];
minimize?: Partial<MinimizeOptions>;
package?: Package;
}

export function generateContent(
docs: Record<string, string>,
options: GenerateContentOptions = {}
): string {
const { prefix, ignore = [], minimize: minimizeOptions, package: pkg } = options;

let content = '';
if (prefix) {
content = `${prefix}\n\n`;
}

let currentSection = '';
const paths = sortPaths(Object.keys(docs));

for (const path of paths) {
if (!shouldIncludeFile(path, ignore)) continue;

// If a specific package is provided, only include its docs
if (pkg) {
if (!path.includes(`/docs/${pkg}/`)) continue;
} else {
// For combined content, only include paths that match any package
const docType = packages.find((p) => path.includes(`/docs/${p}/`));
if (!docType) continue;

const section = getDocumentationStartTitle(docType);
if (section !== currentSection) {
if (currentSection) content += '\n';
content += `${section}\n\n`;
currentSection = section;
}
}

content += `## ${path.replace('../../../content/', '')}\n\n`;
const docContent = minimizeOptions ? minimizeContent(docs[path], minimizeOptions) : docs[path];
content += docContent;
content += '\n';
}

return content;
}
42 changes: 42 additions & 0 deletions apps/svelte.dev/src/routes/docs/[...path]/llms.txt/+server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import type { RequestHandler } from './$types';
import type { EntryGenerator } from './$types';
import { error } from '@sveltejs/kit';
import {
documentsContent,
filterDocsByPackage,
generateContent,
getDocumentationTitle,
packages,
type Package
} from '$lib/server/content';

export const prerender = true;

export const entries: EntryGenerator = () => {
return packages.map((type) => ({ path: type }));
};

export const GET: RequestHandler = async ({ params }) => {
const packageType = params.path;

if (!packages.includes(packageType as Package)) {
error(404, 'Not Found');
}

const filteredDocs = filterDocsByPackage(documentsContent, packageType as Package);

if (Object.keys(filteredDocs).length === 0) {
error(404, 'No documentation found for this package');
}

const PREFIX = `<SYSTEM>${getDocumentationTitle(packageType)}</SYSTEM>`;
const content = `${PREFIX}\n\n${generateContent(filteredDocs)}`;

return new Response(content, {
status: 200,
headers: {
'Content-Type': 'text/plain; charset=utf-8',
'Cache-Control': 'public, max-age=3600'
}
});
};
19 changes: 19 additions & 0 deletions apps/svelte.dev/src/routes/llms-full.txt/+server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import type { RequestHandler } from './$types';
import { documentsContent, generateContent } from '$lib/server/content';

const PREFIX =
'<SYSTEM>This is the full developer documentation for Svelte and SvelteKit.</SYSTEM>';

export const GET: RequestHandler = async () => {
const content = `${PREFIX}\n\n${generateContent(documentsContent)}`;

return new Response(content, {
status: 200,
headers: {
'Content-Type': 'text/plain; charset=utf-8',
'Cache-Control': 'public, max-age=3600'
}
});
};

export const prerender = true;
50 changes: 50 additions & 0 deletions apps/svelte.dev/src/routes/llms.txt/+server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import type { RequestHandler } from './$types';
import { documentsContent, generateContent } from '$lib/server/content';

const PREFIX =
'<SYSTEM>This is the abridged developer documentation for Svelte and SvelteKit.</SYSTEM>';

export const GET: RequestHandler = async () => {
const content = `${PREFIX}\n\n${generateContent(documentsContent, {
ignore: [
// Svelte ignores
'../../../content/docs/svelte/07-misc/04-custom-elements.md',
'../../../content/docs/svelte/07-misc/06-v4-migration-guide.md',
'../../../content/docs/svelte/07-misc/07-v5-migration-guide.md',
'../../../content/docs/svelte/07-misc/99-faq.md',
'../../../content/docs/svelte/07-misc/xx-reactivity-indepth.md',
'../../../content/docs/svelte/98-reference/21-svelte-legacy.md',
'../../../content/docs/svelte/99-legacy/**/*.md',
'../../../content/docs/svelte/98-reference/30-runtime-errors.md',
'../../../content/docs/svelte/98-reference/30-runtime-warnings.md',
'../../../content/docs/svelte/98-reference/30-compiler-errors.md',
'../../../content/docs/svelte/98-reference/30-compiler-warnings.md',
'**/xx-*.md',
// SvelteKit ignores
'../../../content/docs/kit/25-build-and-deploy/*adapter-*.md',
'../../../content/docs/kit/25-build-and-deploy/99-writing-adapters.md',
'../../../content/docs/kit/30-advanced/70-packaging.md',
'../../../content/docs/kit/40-best-practices/05-performance.md',
'../../../content/docs/kit/60-appendix/**/*.md'
],
minimize: {
removeLegacy: true,
removeNoteBlocks: true,
removeDetailsBlocks: true,
removePlaygroundLinks: true,
removePrettierIgnore: true,
normalizeWhitespace: true
}
})}`;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to keep the default preset trimmed down in a way so that most use cases are covered but we still remove less important bits and bobs. For reference the trimmed llms.txt is 514KB as of today, and llms-full.txt (where nothing is omitted) is 843KB.


return new Response(content, {
status: 200,
headers: {
'Content-Type': 'text/plain; charset=utf-8',
'Cache-Control': 'public, max-age=3600'
}
});
};

export const prerender = true;
11 changes: 11 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.