1
+ #!/usr/bin/env python3
2
+
1
3
"""
2
4
This script can be used to automatically generate a table of contents (JSON file) from the markdown files in a directory,
3
5
or multiple directories.
4
6
"""
5
7
6
- #!/usr/bin/env python3
7
-
8
8
import json
9
9
import os
10
10
import argparse
11
11
import sys
12
+ from collections import defaultdict
13
+ import yaml
12
14
13
15
def parse_args () -> argparse .Namespace :
14
16
parser = argparse .ArgumentParser (
@@ -20,43 +22,82 @@ def parse_args() -> argparse.Namespace:
20
22
action = "store_true" ,
21
23
help = "Generates a single TOC for all files in all sub-directories of provided directory. By default, generates TOC per folder." ,
22
24
)
25
+ parser .add_argument (
26
+ "--out" ,
27
+ default = None ,
28
+ help = "Path to output the resulting table of contents file to (by default it is output to the provided directory - file is named according to --dir)"
29
+ )
23
30
parser .add_argument (
24
31
"--dir" ,
25
32
help = "Path to a folder containing markdown (.md, .mdx) documents containing YAML with title, description, slug."
26
33
)
34
+ parser .add_argument ('--ignore' , metavar = 'S' , type = str , nargs = '+' ,
35
+ help = 'Directory names to ignore. E.g --ignore _snippets images' )
27
36
return parser .parse_args ()
28
37
29
38
def extract_title_description_slug (filename ):
30
- with open (filename , "r" ) as f :
31
- lines = f .readlines ()
32
-
33
- title , description , slug = None , None , None
34
- for line in lines :
35
- if line .startswith ("title:" ):
36
- title = line .strip ().split (": " )[1 ]
37
- if line .startswith ("description:" ):
38
- description = line .strip ().split (": " )[1 ]
39
- elif line .startswith ("slug:" ):
40
- slug = line .strip ().split (": " )[1 ]
41
- if title and slug and description :
42
- return {"title" : title , "description" : description , "slug" : slug , "dir" : filename }
43
- return None
44
-
45
- def walk_dirs (root_dir ):
39
+ data = defaultdict (str )
40
+ missing_fields = []
41
+ frontmatter_data = {}
42
+
43
+ try :
44
+ with open (filename , "r" ) as f :
45
+ content = f .read ()
46
+ # find the first frontmatter tag
47
+ frontmatter_start = content .find ('---\n ' )
48
+ if frontmatter_start != - 1 :
49
+ # find the second frontmatter tag
50
+ frontmatter_end = content .find ('---\n ' , frontmatter_start + 4 )
51
+ if frontmatter_start != - 1 :
52
+ # find the second frontmatter tag
53
+ frontmatter_end = content .find ('---\n ' , frontmatter_start + 4 )
54
+ if frontmatter_end != - 1 :
55
+ frontmatter_str = content [frontmatter_start + 4 :frontmatter_end ]
56
+ frontmatter_data = yaml .safe_load (frontmatter_str ) or {}
57
+
58
+ data .update (frontmatter_data )
59
+
60
+ if missing_fields :
61
+ print (f"Warning: { filename } is missing some fields:" )
62
+ for field in missing_fields :
63
+ print (f"- { field } " )
64
+
65
+ return frontmatter_data
66
+ except OSError as e :
67
+ print (f"Ran into a problem reading frontmatter: { e } " )
68
+ sys .exit (1 )
69
+ def walk_dirs (root_dir , ignore_dirs = []):
46
70
for root , dirs , files in os .walk (root_dir ):
71
+ # Modify the 'dirs' list in-place to remove ignored directories
72
+ dirs [:] = [d for d in dirs if d not in ignore_dirs
73
+ and not any (d .startswith (ig ) for ig in ignore_dirs )]
47
74
yield root
48
75
49
- def write_to_file (json_array , output_path ):
76
+ def write_to_file (json_items , directory , output = None ):
77
+
78
+ if output is not None :
79
+ # output to the given path the toc.json file
80
+ # If dir='docs/en/interfaces/formats' the file is called docs_en_interfaces_formats_toc.json
81
+ output_path = output + "/" + directory .replace ("/" , "_" )
82
+ else :
83
+ output_path = directory
50
84
try :
51
85
os .makedirs (os .path .dirname (output_path ), exist_ok = True ) # Create directories if they don't exist
52
86
with open (output_path , "w" ) as f :
53
- json .dump (json_array , f , indent = 4 )
87
+ json .dump (json_items , f , indent = 4 )
54
88
f .write ('\n ' )
89
+ print (f"Wrote { output_path } " )
55
90
except OSError as e :
56
91
if e .errno == 21 :
57
92
print (f"Directory already exists: { e } " )
58
93
else :
59
94
print (f"An error occurred creating directory: { e } " )
95
+ def write_file (json_items , args , directory ):
96
+ print (args )
97
+ if args .out is not None :
98
+ write_to_file (json_items , directory + "/toc.json" , args .out )
99
+ elif args .out is None :
100
+ write_to_file (json_items , directory + "/toc.json" )
60
101
61
102
def main ():
62
103
@@ -66,13 +107,15 @@ def main():
66
107
if root_dir is None :
67
108
print ("Please provide a directory with argument --dir" )
68
109
sys .exit (1 )
69
-
70
- if args .single_toc :
110
+ if os .path .lexists (root_dir ) is False :
111
+ print ("Path provided does not exist" )
112
+ sys .exit (1 )
113
+ if args .single_toc is True :
71
114
json_items = [] # single list for all directories
72
115
73
- for directory in walk_dirs (root_dir ): # Walk directories
116
+ for directory in walk_dirs (root_dir , args . ignore ): # Walk directories
74
117
75
- if not args .single_toc :
118
+ if args .single_toc is False :
76
119
json_items = [] # new list for each directory
77
120
78
121
for filename in os .listdir (directory ): # for each directory
@@ -85,19 +128,24 @@ def main():
85
128
result = extract_title_description_slug (full_path )
86
129
if result is not None :
87
130
json_items .append (result )
88
-
89
- if not args .single_toc :
90
- json_array = sorted (json_items , key = lambda x : x .get ("title" ))
91
-
131
+ if args .single_toc is False :
132
+ # don't write toc.json for empty folders
133
+ if len (json_items ) != 0 :
134
+ json_items = sorted (json_items , key = lambda x : x .get ("title" ))
135
+ # output to the specified directory if arg --out is provided
136
+ write_file (json_items , args , directory )
137
+ else :
138
+ print ("Ran into an issue trying to extract YAML: empty result" )
139
+
140
+ if args .single_toc is True :
92
141
# don't write toc.json for empty folders
93
142
if len (json_items ) != 0 :
94
- write_to_file (json_items , directory + "/toc.json" )
95
-
96
- if args .single_toc :
97
- json_array = sorted (json_items , key = lambda x : x .get ("title" ))
98
- # don't write toc.json for empty folders
99
- if len (json_items ) != 0 :
100
- write_to_file (json_items , root_dir + "/toc.json" )
143
+ json_array = sorted (json_items , key = lambda x : x .get ("title" ))
144
+ # output to the specified directory if arg --out is provided
145
+ write_file (json_items , args , directory )
146
+ sys .exit (0 )
147
+ else :
148
+ sys .exit (1 )
101
149
102
150
if __name__ == "__main__" :
103
151
main ()
0 commit comments