@@ -6,47 +6,81 @@ const LLMS_FILE = path.join(BUILD_DIR, 'llms.txt');
66
77const INDENT_LEVEL = 2 ;
88
9- // Paths that should be indented at the first level
10- const INDENTED_PATHS = [ '/api/v2/' , '/academy/' , '/platform/' , '/legal/' ] ;
11-
12- // Main API pages that should have no indentation
13- const MAIN_API_PAGES = [ '/api.md' , '/api/v2.md' ] ;
9+ const MAIN_SECTIONS = [ '/api.md' , '/api/v2.md' ] ;
1410
1511const BASE_URL = process . env . APIFY_DOCS_ABSOLUTE_URL || 'https://docs.apify.com' ;
16- console . log ( 'debug: BASE_URL' , BASE_URL ) ;
12+
13+ /**
14+ * Extracts the path from a URL, removing the base URL and query parameters
15+ */
16+ function extractPathFromUrl ( url ) {
17+ const urlObj = new URL ( url ) ;
18+ return urlObj . pathname ;
19+ }
20+
1721/**
18- * Calculates the depth of a URL by counting non-file path segments.
22+ * Calculates the hierarchical depth of a URL path.
23+ * This counts directory levels, not including the filename.
1924 */
20- function getUrlDepth ( url ) {
21- const baseUrl = url . replace ( BASE_URL , '' ) ;
22- const urlSegments = baseUrl . split ( '/' ) . filter ( ( segment ) => segment && segment !== '' ) ;
23- const nonFileSegments = urlSegments . filter ( ( segment ) => ! segment . endsWith ( '.md' ) ) ;
25+ function getUrlHierarchyDepth ( url ) {
26+ const urlPath = extractPathFromUrl ( url ) ;
27+ const segments = urlPath . split ( '/' ) . filter ( ( segment ) => segment && segment !== '' ) ;
28+
29+ // Remove the .md file extension to count only directory levels
30+ const nonFileSegments = segments . filter ( ( segment ) => ! segment . endsWith ( '.md' ) ) ;
31+
2432 return nonFileSegments . length ;
2533}
2634
2735/**
28- * Determines the indentation level for a documentation link based on its URL.
36+ * Determines if a URL is a main section page (level 0)
37+ */
38+ function isMainSectionPage ( url ) {
39+ const urlPath = extractPathFromUrl ( url ) ;
40+ const segments = urlPath . split ( '/' ) . filter ( ( segment ) => segment && segment !== '' ) ;
41+
42+ // Main pages are those with only one segment (the .md file)
43+ // or specific known main pages
44+ if ( segments . length === 1 ) {
45+ return true ;
46+ }
47+
48+ // Special cases for main API pages
49+ if ( MAIN_SECTIONS . includes ( urlPath ) ) {
50+ return true ;
51+ }
52+
53+ return false ;
54+ }
55+
56+ /**
57+ * Determines the indentation level for a documentation link based on its URL hierarchy.
2958 */
3059function getLinkIndentation ( url ) {
31- if ( MAIN_API_PAGES . some ( ( page ) => url . includes ( page ) ) ) {
60+ // Main section pages get no indentation
61+ if ( isMainSectionPage ( url ) ) {
3262 return 0 ;
3363 }
3464
35- if ( INDENTED_PATHS . some ( ( item ) => url . includes ( item ) ) ) {
36- return INDENT_LEVEL ;
37- }
65+ // Calculate hierarchy depth
66+ const depth = getUrlHierarchyDepth ( url ) ;
3867
39- // Default based on URL depth
40- const depth = getUrlDepth ( url ) ;
41- return Math . min ( depth * INDENT_LEVEL , INDENT_LEVEL * 3 ) ;
68+ // The first level after main sections gets 1 level of indentation
69+ // Each subsequent level gets another level of indentation
70+ return Math . min ( depth * INDENT_LEVEL , INDENT_LEVEL * 4 ) ; // Cap at 4 levels
4271}
4372
4473/**
45- * Determines the indentation level for a line based on its content type.
74+ * Determines the indentation level for a line based on its content type and URL .
4675 */
4776function getIndentationLevel ( line , lineIndex , allLines ) {
48- if ( line . startsWith ( '# ' ) || line . startsWith ( '## ' ) ) {
49- return 0 ; // Main title or section title - no indent
77+ // Handle markdown headers
78+ if ( line . startsWith ( '# ' ) ) {
79+ return 0 ; // Main title - no indent
80+ }
81+
82+ if ( line . startsWith ( '## ' ) ) {
83+ return 0 ; // Section title - no indent
5084 }
5185
5286 if ( line . startsWith ( '### ' ) ) {
@@ -57,6 +91,7 @@ function getIndentationLevel(line, lineIndex, allLines) {
5791 return INDENT_LEVEL * 2 ; // Sub-subsection title - 2 level indent
5892 }
5993
94+ // Handle markdown links with URLs
6095 if ( line . startsWith ( '- [' ) && line . includes ( `](${ BASE_URL } /` ) ) {
6196 const urlMatch = line . match ( new RegExp ( `\\]\\((${ BASE_URL . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, '\\$&' ) } /[^)]+)\\)` ) ) ;
6297 if ( ! urlMatch ) {
@@ -65,9 +100,10 @@ function getIndentationLevel(line, lineIndex, allLines) {
65100 return getLinkIndentation ( urlMatch [ 1 ] ) ;
66101 }
67102
103+ // For other content, use the same indentation as the previous line
68104 if ( lineIndex > 0 ) {
69- // Other content - use same indent as previous line
70- const prevIndentMatch = allLines [ lineIndex - 1 ] . match ( / ^ ( \s * ) / ) ;
105+ const prevLine = allLines [ lineIndex - 1 ] ;
106+ const prevIndentMatch = prevLine . match ( / ^ ( \s * ) / ) ;
71107 return prevIndentMatch ? prevIndentMatch [ 1 ] . length : INDENT_LEVEL ;
72108 }
73109
0 commit comments