Skip to content

Commit dacb4f8

Browse files
committed
feat: improve the indentation
1 parent 560fb79 commit dacb4f8

File tree

1 file changed

+60
-24
lines changed

1 file changed

+60
-24
lines changed

scripts/indentLlmsFile.mjs

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,47 +6,81 @@ const LLMS_FILE = path.join(BUILD_DIR, 'llms.txt');
66

77
const INDENT_LEVEL = 2;
88

9-
// Paths that should be indented at the first level
10-
const INDENTED_PATHS = ['/api/v2/', '/academy/', '/platform/', '/legal/'];
11-
12-
// Main API pages that should have no indentation
13-
const MAIN_API_PAGES = ['/api.md', '/api/v2.md'];
9+
const MAIN_SECTIONS = ['/api.md', '/api/v2.md'];
1410

1511
const BASE_URL = process.env.APIFY_DOCS_ABSOLUTE_URL || 'https://docs.apify.com';
16-
console.log('debug: BASE_URL', BASE_URL);
12+
13+
/**
14+
* Extracts the path from a URL, removing the base URL and query parameters
15+
*/
16+
function extractPathFromUrl(url) {
17+
const urlObj = new URL(url);
18+
return urlObj.pathname;
19+
}
20+
1721
/**
18-
* Calculates the depth of a URL by counting non-file path segments.
22+
* Calculates the hierarchical depth of a URL path.
23+
* This counts directory levels, not including the filename.
1924
*/
20-
function getUrlDepth(url) {
21-
const baseUrl = url.replace(BASE_URL, '');
22-
const urlSegments = baseUrl.split('/').filter((segment) => segment && segment !== '');
23-
const nonFileSegments = urlSegments.filter((segment) => !segment.endsWith('.md'));
25+
function getUrlHierarchyDepth(url) {
26+
const urlPath = extractPathFromUrl(url);
27+
const segments = urlPath.split('/').filter((segment) => segment && segment !== '');
28+
29+
// Remove the .md file extension to count only directory levels
30+
const nonFileSegments = segments.filter((segment) => !segment.endsWith('.md'));
31+
2432
return nonFileSegments.length;
2533
}
2634

2735
/**
28-
* Determines the indentation level for a documentation link based on its URL.
36+
* Determines if a URL is a main section page (level 0)
37+
*/
38+
function isMainSectionPage(url) {
39+
const urlPath = extractPathFromUrl(url);
40+
const segments = urlPath.split('/').filter((segment) => segment && segment !== '');
41+
42+
// Main pages are those with only one segment (the .md file)
43+
// or specific known main pages
44+
if (segments.length === 1) {
45+
return true;
46+
}
47+
48+
// Special cases for main API pages
49+
if (MAIN_SECTIONS.includes(urlPath)) {
50+
return true;
51+
}
52+
53+
return false;
54+
}
55+
56+
/**
57+
* Determines the indentation level for a documentation link based on its URL hierarchy.
2958
*/
3059
function getLinkIndentation(url) {
31-
if (MAIN_API_PAGES.some((page) => url.includes(page))) {
60+
// Main section pages get no indentation
61+
if (isMainSectionPage(url)) {
3262
return 0;
3363
}
3464

35-
if (INDENTED_PATHS.some((item) => url.includes(item))) {
36-
return INDENT_LEVEL;
37-
}
65+
// Calculate hierarchy depth
66+
const depth = getUrlHierarchyDepth(url);
3867

39-
// Default based on URL depth
40-
const depth = getUrlDepth(url);
41-
return Math.min(depth * INDENT_LEVEL, INDENT_LEVEL * 3);
68+
// The first level after main sections gets 1 level of indentation
69+
// Each subsequent level gets another level of indentation
70+
return Math.min(depth * INDENT_LEVEL, INDENT_LEVEL * 4); // Cap at 4 levels
4271
}
4372

4473
/**
45-
* Determines the indentation level for a line based on its content type.
74+
* Determines the indentation level for a line based on its content type and URL.
4675
*/
4776
function getIndentationLevel(line, lineIndex, allLines) {
48-
if (line.startsWith('# ') || line.startsWith('## ')) {
49-
return 0; // Main title or section title - no indent
77+
// Handle markdown headers
78+
if (line.startsWith('# ')) {
79+
return 0; // Main title - no indent
80+
}
81+
82+
if (line.startsWith('## ')) {
83+
return 0; // Section title - no indent
5084
}
5185

5286
if (line.startsWith('### ')) {
@@ -57,6 +91,7 @@ function getIndentationLevel(line, lineIndex, allLines) {
5791
return INDENT_LEVEL * 2; // Sub-subsection title - 2 level indent
5892
}
5993

94+
// Handle markdown links with URLs
6095
if (line.startsWith('- [') && line.includes(`](${BASE_URL}/`)) {
6196
const urlMatch = line.match(new RegExp(`\\]\\((${BASE_URL.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/[^)]+)\\)`));
6297
if (!urlMatch) {
@@ -65,9 +100,10 @@ function getIndentationLevel(line, lineIndex, allLines) {
65100
return getLinkIndentation(urlMatch[1]);
66101
}
67102

103+
// For other content, use the same indentation as the previous line
68104
if (lineIndex > 0) {
69-
// Other content - use same indent as previous line
70-
const prevIndentMatch = allLines[lineIndex - 1].match(/^(\s*)/);
105+
const prevLine = allLines[lineIndex - 1];
106+
const prevIndentMatch = prevLine.match(/^(\s*)/);
71107
return prevIndentMatch ? prevIndentMatch[1].length : INDENT_LEVEL;
72108
}
73109

0 commit comments

Comments
 (0)