Skip to content

Commit b1db58a

Browse files
committed
feat: Indent llms.txt file
1 parent 56bf9c2 commit b1db58a

File tree

3 files changed

+131
-2
lines changed

3 files changed

+131
-2
lines changed

package-lock.json

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"lint:code": "eslint .",
4242
"lint:code:fix": "eslint . --fix",
4343
"postinstall": "patch-package",
44-
"postbuild": "node ./scripts/joinLlmsFiles.mjs"
44+
"postbuild": "node ./scripts/joinLlmsFiles.mjs && node ./scripts/indentLlmsFile.mjs"
4545
},
4646
"devDependencies": {
4747
"@apify/eslint-config": "^1.0.0",

scripts/indentLlmsFile.mjs

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import fs from 'node:fs/promises';
2+
import path from 'node:path';
3+
4+
const BUILD_DIR = path.resolve('build');
5+
const LLMS_FILE = path.join(BUILD_DIR, 'llms.txt');
6+
7+
const INDENT_LEVEL = 2;
8+
9+
// Paths that should be indented at the first level
10+
const INDENTED_PATHS = ['/api/v2/', '/academy/', '/platform/', '/legal/'];
11+
12+
// Main API pages that should have no indentation
13+
const MAIN_API_PAGES = ['/api.md', '/api/v2.md'];
14+
15+
/**
16+
* Calculates the depth of a URL by counting non-file path segments.
17+
*/
18+
function getUrlDepth(url) {
19+
const baseUrl = url.replace('https://docs.apify.com', '');
20+
const urlSegments = baseUrl.split('/').filter((segment) => segment && segment !== '');
21+
const nonFileSegments = urlSegments.filter((segment) => !segment.endsWith('.md'));
22+
return nonFileSegments.length;
23+
}
24+
25+
/**
26+
* Determines the indentation level for a documentation link based on its URL.
27+
*/
28+
function getLinkIndentation(url) {
29+
if (MAIN_API_PAGES.some((page) => url.includes(page))) {
30+
return 0;
31+
}
32+
33+
if (INDENTED_PATHS.some((item) => url.includes(item))) {
34+
return INDENT_LEVEL;
35+
}
36+
37+
// Default based on URL depth
38+
const depth = getUrlDepth(url);
39+
return Math.min(depth * INDENT_LEVEL, INDENT_LEVEL * 3);
40+
}
41+
42+
/**
43+
* Determines the indentation level for a line based on its content type.
44+
*/
45+
function getIndentationLevel(line, lineIndex, allLines) {
46+
if (line.startsWith('# ') || line.startsWith('## ')) {
47+
return 0; // Main title or section title - no indent
48+
}
49+
50+
if (line.startsWith('### ')) {
51+
return INDENT_LEVEL; // Subsection title - 1 level indent
52+
}
53+
54+
if (line.startsWith('#### ')) {
55+
return INDENT_LEVEL * 2; // Sub-subsection title - 2 level indent
56+
}
57+
58+
if (line.startsWith('- [') && line.includes('](https://docs.apify.com/')) {
59+
const urlMatch = line.match(/\]\((https:\/\/docs\.apify\.com\/[^)]+)\)/);
60+
if (!urlMatch) {
61+
return INDENT_LEVEL; // Fallback if URL parsing fails
62+
}
63+
return getLinkIndentation(urlMatch[1]);
64+
}
65+
66+
if (lineIndex > 0) {
67+
// Other content - use same indent as previous line
68+
const prevIndentMatch = allLines[lineIndex - 1].match(/^(\s*)/);
69+
return prevIndentMatch ? prevIndentMatch[1].length : INDENT_LEVEL;
70+
}
71+
72+
return INDENT_LEVEL;
73+
}
74+
75+
/**
76+
* Applies hierarchical indentation to content based on URL structure and content type.
77+
*/
78+
function indentContent(content) {
79+
const lines = content.split('\n');
80+
const indentedLines = [];
81+
82+
for (let i = 0; i < lines.length; i++) {
83+
const line = lines[i];
84+
const trimmedLine = line.trim();
85+
86+
// Skip empty lines
87+
if (!trimmedLine) {
88+
indentedLines.push('');
89+
continue;
90+
}
91+
92+
const indent = getIndentationLevel(trimmedLine, i, lines);
93+
const indentStr = ' '.repeat(indent);
94+
indentedLines.push(indentStr + trimmedLine);
95+
}
96+
97+
return indentedLines.join('\n');
98+
}
99+
100+
/**
101+
* Main function to indent the LLMs file.
102+
* Reads the file, applies indentation, and writes it back.
103+
*/
104+
async function indentLlmsFile() {
105+
try {
106+
await fs.access(LLMS_FILE);
107+
const content = await fs.readFile(LLMS_FILE, 'utf8');
108+
const indentedContent = indentContent(content);
109+
await fs.writeFile(LLMS_FILE, indentedContent, 'utf8');
110+
console.log('Successfully indented llms.txt file');
111+
} catch (error) {
112+
if (error.code === 'ENOENT') {
113+
console.log('llms.txt file not found, skipping indentation');
114+
} else {
115+
console.error('Error indenting llms.txt file:', error);
116+
process.exit(1);
117+
}
118+
}
119+
}
120+
121+
indentLlmsFile().catch((err) => {
122+
console.error('Failed to indent LLMs files:', err);
123+
process.exit(1);
124+
});

0 commit comments

Comments
 (0)