Skip to content

Commit 1a08176

Browse files
authored
feat: create llms.txt generator (#254)
* feat: create llms.txt generator * refactor: improvements * fix: typo * refactor: remove paragraphToString util * refactor: some improvements * fix: doc api url path * fix: doc api url path * refactor: some changes * feat: add llm_description prop * test: add llm_description prop * refacotr: remove template replace * feat(linter): create llm description rule * refactor(linter): remove for-of loop * refactor(llms-txt): remove docs url suffix * fix: base url const
1 parent 2b200f1 commit 1a08176

File tree

13 files changed

+192
-16
lines changed

13 files changed

+192
-16
lines changed

src/constants.mjs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,6 @@ export const DOC_NODE_VERSION = process.version;
66
// This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions
77
export const DOC_NODE_CHANGELOG_URL =
88
'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md';
9+
10+
// The base URL for the Node.js website
11+
export const BASE_URL = 'https://nodejs.org/';

src/generators/index.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import addonVerify from './addon-verify/index.mjs';
1010
import apiLinks from './api-links/index.mjs';
1111
import oramaDb from './orama-db/index.mjs';
1212
import astJs from './ast-js/index.mjs';
13+
import llmsTxt from './llms-txt/index.mjs';
1314

1415
export const publicGenerators = {
1516
'json-simple': jsonSimple,
@@ -21,6 +22,7 @@ export const publicGenerators = {
2122
'addon-verify': addonVerify,
2223
'api-links': apiLinks,
2324
'orama-db': oramaDb,
25+
'llms-txt': llmsTxt,
2426
};
2527

2628
export const allGenerators = {

src/generators/llms-txt/index.mjs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import { readFile, writeFile } from 'node:fs/promises';
2+
import { join } from 'node:path';
3+
4+
import { buildApiDocLink } from './utils/buildApiDocLink.mjs';
5+
6+
/**
7+
* This generator generates a llms.txt file to provide information to LLMs at
8+
* inference time
9+
*
10+
* @typedef {Array<ApiDocMetadataEntry>} Input
11+
*
12+
* @type {GeneratorMetadata<Input, string>}
13+
*/
14+
export default {
15+
name: 'llms-txt',
16+
17+
version: '1.0.0',
18+
19+
description:
20+
'Generates a llms.txt file to provide information to LLMs at inference time',
21+
22+
dependsOn: 'ast',
23+
24+
/**
25+
* Generates a llms.txt file
26+
*
27+
* @param {Input} entries
28+
* @param {Partial<GeneratorOptions>} options
29+
* @returns {Promise<void>}
30+
*/
31+
async generate(entries, { output }) {
32+
const template = await readFile(
33+
join(import.meta.dirname, 'template.txt'),
34+
'utf-8'
35+
);
36+
37+
const apiDocsLinks = entries
38+
// Filter non top-level headings
39+
.filter(entry => entry.heading.depth === 1)
40+
.map(entry => `- ${buildApiDocLink(entry)}`)
41+
.join('\n');
42+
43+
const filledTemplate = `${template}${apiDocsLinks}`;
44+
45+
if (output) {
46+
await writeFile(join(output, 'llms.txt'), filledTemplate);
47+
}
48+
49+
return filledTemplate;
50+
},
51+
};
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Node.js Documentation
2+
3+
> Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications.
4+
5+
Below are the sections of the API documentation. Look out especially towards the links that point towards guidance/introductioon to the structure of this documentation.
6+
7+
## API Documentations
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { BASE_URL } from '../../../constants.mjs';
2+
import { transformNodeToString } from '../../../utils/unist.mjs';
3+
4+
/**
5+
* Retrieves the description of a given API doc entry. It first checks whether
6+
* the entry has a llm_description property. If not, it extracts the first
7+
* paragraph from the entry's content.
8+
*
9+
* @param {ApiDocMetadataEntry} entry
10+
* @returns {string}
11+
*/
12+
const getEntryDescription = entry => {
13+
if (entry.llm_description) {
14+
return entry.llm_description;
15+
}
16+
17+
const descriptionNode = entry.content.children.find(
18+
child => child.type === 'paragraph'
19+
);
20+
21+
if (!descriptionNode) {
22+
return '';
23+
}
24+
25+
return (
26+
transformNodeToString(descriptionNode)
27+
// Remove newlines and extra spaces
28+
.replace(/[\r\n]+/g, '')
29+
);
30+
};
31+
32+
/**
33+
* Builds a markdown link for an API doc entry
34+
*
35+
* @param {ApiDocMetadataEntry} entry
36+
* @returns {string}
37+
*/
38+
export const buildApiDocLink = entry => {
39+
const title = entry.heading.data.name;
40+
41+
const path = entry.api_doc_source.replace(/^doc\//, '/docs/latest/');
42+
const url = new URL(path, BASE_URL);
43+
44+
const link = `[${title}](${url})`;
45+
46+
const description = getEntryDescription(entry);
47+
48+
return `${link}: ${description}`;
49+
};

src/linter/constants.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,6 @@ export const LINT_MESSAGES = {
55
missingChangeVersion: 'Missing version field in the API doc entry',
66
invalidChangeVersion: 'Invalid version number: {{version}}',
77
duplicateStabilityNode: 'Duplicate stability node',
8+
missingLlmDescription:
9+
'Missing llm_description field or paragraph node in the API doc entry',
810
};

src/linter/rules/index.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import { duplicateStabilityNodes } from './duplicate-stability-nodes.mjs';
44
import { invalidChangeVersion } from './invalid-change-version.mjs';
55
import { missingIntroducedIn } from './missing-introduced-in.mjs';
6+
import { missingLlmDescription } from './missing-llm-description.mjs';
67

78
/**
89
* @type {Record<string, import('../types').LintRule>}
@@ -11,4 +12,5 @@ export default {
1112
'duplicate-stability-nodes': duplicateStabilityNodes,
1213
'invalid-change-version': invalidChangeVersion,
1314
'missing-introduced-in': missingIntroducedIn,
15+
'missing-llm-description': missingLlmDescription,
1416
};
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import { LINT_MESSAGES } from '../constants.mjs';
2+
3+
/**
4+
* Checks if a top-level entry is missing a llm_description field or a paragraph
5+
* node.
6+
*
7+
* @param {ApiDocMetadataEntry[]} entries
8+
* @returns {Array<import('../types.d.ts').LintIssue>}
9+
*/
10+
export const missingLlmDescription = entries => {
11+
return entries
12+
.filter(entry => {
13+
// Only process top-level headings
14+
if (entry.heading.depth !== 1) {
15+
return false;
16+
}
17+
18+
// Skip entries that have an llm_description property
19+
if (entry.llm_description !== undefined) {
20+
return false;
21+
}
22+
23+
const hasParagraph = entry.content.children.some(
24+
node => node.type === 'paragraph'
25+
);
26+
27+
// Skip entries that contain a paragraph that can be used as a fallback.
28+
if (hasParagraph) {
29+
return false;
30+
}
31+
32+
return true;
33+
})
34+
.map(entry => mapToMissingEntryWarning(entry));
35+
};
36+
37+
/**
38+
* Maps a entry to a warning for missing llm description.
39+
*
40+
* @param {ApiDocMetadataEntry} entry
41+
* @returns {import('../types.d.ts').LintIssue}
42+
*/
43+
const mapToMissingEntryWarning = entry => ({
44+
level: 'warn',
45+
message: LINT_MESSAGES.missingLlmDescription,
46+
location: { path: entry.api_doc_source },
47+
});

src/metadata.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ const createMetadata = slugger => {
131131
updates = [],
132132
changes = [],
133133
tags = [],
134+
llm_description,
134135
} = internalMetadata.properties;
135136

136137
// Also add the slug to the heading data as it is used to build the heading
@@ -157,6 +158,7 @@ const createMetadata = slugger => {
157158
content: section,
158159
tags,
159160
introduced_in,
161+
llm_description,
160162
yaml_position: internalMetadata.yaml_position,
161163
};
162164
},

src/test/metadata.test.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ describe('createMetadata', () => {
7272
heading,
7373
n_api_version: undefined,
7474
introduced_in: undefined,
75+
llm_description: undefined,
7576
removed_in: undefined,
7677
slug: 'test-heading',
7778
source_link: 'test.com',

0 commit comments

Comments
 (0)