7
7
8
8
kb_file_path = './data/knowledge_base.json'
9
9
10
- def clean_tmp (dir_path ):
11
- """ Clears out all contents of the specified directory except for prebuild.sh """
12
- for item in os .listdir (dir_path ):
13
- item_path = os .path .join (dir_path , item )
14
- if item != "prebuild.sh" : # Keep prebuild.sh
15
- if os .path .isdir (item_path ):
16
- shutil .rmtree (item_path )
17
- else :
18
- os .remove (item_path )
19
-
20
10
def clone_repository (repo_url , local_dir ):
21
11
""" Clone or pull the repository based on its existence. """
22
12
if not os .path .exists (local_dir ):
@@ -30,7 +20,6 @@ def clone_repository(repo_url, local_dir):
30
20
def setup_repositories ():
31
21
tmp_dir = ".tmp"
32
22
os .makedirs (tmp_dir , exist_ok = True )
33
- clean_tmp (tmp_dir ) # Clean the temporary directory before setting up
34
23
35
24
# Define repositories and their URLs
36
25
repos = {
@@ -39,65 +28,34 @@ def setup_repositories():
39
28
"samples" : "https://github.com/DefangLabs/samples.git"
40
29
}
41
30
42
- # Change to the temporary directory
43
- original_dir = os .getcwd ()
44
- os .chdir (tmp_dir )
45
-
46
31
# Clone each repository
47
32
for repo_name , repo_url in repos .items ():
48
- clone_repository (repo_url , repo_name )
49
-
50
- # Return to the original directory
51
- os .chdir (original_dir )
33
+ clone_repository (repo_url , os .path .join (tmp_dir , repo_name ))
52
34
53
35
def run_prebuild_script ():
54
- """ Run the 'prebuild.sh' script located in the .tmp directory. """
55
- os .chdir (".tmp" )
56
- script_path = os .path .join ("./" , "prebuild.sh" ) # Ensure the path is correct
57
- if os .path .exists (script_path ):
58
- print ("Running prebuild.sh..." )
59
- try :
60
- subprocess .run (["bash" , script_path ], check = True )
61
- except subprocess .CalledProcessError as e :
62
- print (f"Error running prebuild.sh: { e } " )
63
- else :
64
- print ("prebuild.sh not found." )
65
-
66
- def cleanup ():
67
- """ Clean up unneeded files, preserving only 'docs' and 'blog' directories """
68
- os .chdir ("./defang-docs" )
69
- for item in os .listdir ('.' ):
70
- if item not in ['docs' , 'blog' ]: # Check if the item is not one of the directories to keep
71
- item_path = os .path .join ('.' , item ) # Construct the full path
72
- if os .path .isdir (item_path ):
73
- shutil .rmtree (item_path ) # Remove the directory and all its contents
74
- else :
75
- os .remove (item_path ) # Remove the file
76
- print ("Cleanup completed successfully." )
36
+ """ Run the defang-docs repo prebuild script"""
37
+
38
+ subprocess .run (
39
+ ["npm" , "-C" , ".tmp/defang-docs" , "install" ],
40
+ check = True ,
41
+ stdout = subprocess .PIPE ,
42
+ stderr = subprocess .PIPE
43
+ )
44
+
45
+ subprocess .run (
46
+ ["npm" , "-C" , ".tmp/defang-docs" , "run" , "prebuild" ],
47
+ check = True ,
48
+ stdout = subprocess .PIPE ,
49
+ stderr = subprocess .PIPE
50
+ )
77
51
78
52
def parse_markdown ():
79
53
""" Parse markdown files in the current directory into JSON """
80
- reset_knowledge_base () # Reset the JSON database file
81
54
recursive_parse_directory ('./.tmp/defang-docs' ) # Parse markdown files in the current directory
82
55
print ("Markdown parsing completed successfully." )
83
56
84
- def reset_knowledge_base ():
85
- """ Resets or initializes the knowledge base JSON file. """
86
- with open (kb_file_path , 'w' ) as output_file :
87
- json .dump ([], output_file )
88
-
89
- def parse_markdown_file_to_json (file_path ):
57
+ def parse_markdown_file_to_json (json_output , current_id , file_path ):
90
58
""" Parses individual markdown file and adds its content to JSON """
91
- try :
92
- # Load existing content if the file exists
93
- with open (kb_file_path , 'r' ) as existing_file :
94
- json_output = json .load (existing_file )
95
- current_id = len (json_output ) + 1 # Start ID from the next available number
96
- except (FileNotFoundError , json .JSONDecodeError ):
97
- # If the file doesn't exist or is empty, start fresh
98
- json_output = []
99
- current_id = 1
100
-
101
59
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
102
60
lines = file .readlines ()
103
61
@@ -148,28 +106,17 @@ def parse_markdown_file_to_json(file_path):
148
106
"text" : text ,
149
107
"path" : adjust_knowledge_base_entry_path (file_path ) # Adjust path format
150
108
})
151
- current_id += 1
152
-
153
- # Write the augmented JSON output to ./data/knowledge_base.json
154
- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
155
- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
156
109
157
110
def adjust_knowledge_base_entry_path (file_path ):
158
111
""" Adjusts the file path format for storage. """
159
- return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , file_path . replace ( "./.tmp/defang-docs" , "" ). replace ( ".mdx" , "" ). replace ( ".md" , "" ))
112
+ return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , normalize_docs_path ( file_path ))
160
113
161
- def parse_cli_markdown (file_path ):
162
- """ Parses CLI-specific markdown files """
163
- try :
164
- # Load existing content if the file exists
165
- with open (kb_file_path , 'r' ) as existing_file :
166
- json_output = json .load (existing_file )
167
- current_id = len (json_output ) + 1 # Start ID from the next available number
168
- except (FileNotFoundError , json .JSONDecodeError ):
169
- # If the file doesn't exist or is empty, start fresh
170
- json_output = []
171
- current_id = 1
114
+ def normalize_docs_path (path ):
115
+ """ Normalizes the file path to ensure consistent formatting. """
116
+ return path .replace ("./.tmp/defang-docs" , "" ).replace (".mdx" , "" ).replace (".md" , "" )
172
117
118
+ def parse_cli_markdown (json_output , current_id , file_path ):
119
+ """ Parses CLI-specific markdown files """
173
120
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
174
121
lines = file .readlines ()
175
122
@@ -190,32 +137,32 @@ def parse_cli_markdown(file_path):
190
137
"id" : current_id ,
191
138
"about" : about ,
192
139
"text" : text ,
193
- "path" : file_path . replace ( "./.tmp/defang-docs" , "" ). replace ( ".mdx" , "" ). replace ( ".md" , "" )
140
+ "path" : normalize_docs_path ( file_path )
194
141
})
195
- current_id += 1
196
-
197
- # Write the augmented JSON output to data/knowledge_base.json
198
- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
199
- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
200
142
201
143
def recursive_parse_directory (root_dir ):
202
144
""" Recursively parses all markdown files in the directory. """
203
- for dirpath , dirnames , filenames in os .walk (root_dir ):
145
+ paths = []
146
+ for dirpath , _dirnames , filenames in os .walk (root_dir ):
204
147
for filename in filenames :
205
- if filename .lower ().endswith ('.md' ) or filename .lower ().endswith ('.mdx' ):
206
- file_path = os .path .join (dirpath , filename )
207
- if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
208
- parse_cli_markdown (file_path )
209
- else :
210
- parse_markdown_file_to_json (file_path )
148
+ lower_filename = filename .lower ()
149
+ if lower_filename .endswith ('.md' ) or lower_filename .endswith ('.mdx' ):
150
+ paths .append (os .path .join (dirpath , filename ))
151
+
152
+ with open (kb_file_path , 'r' ) as kb_file :
153
+ kb_data = json .load (kb_file )
154
+
155
+ for id , file_path in enumerate (paths , start = 1 ):
156
+ if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
157
+ parse_cli_markdown (kb_data , id , file_path )
158
+ else :
159
+ parse_markdown_file_to_json (kb_data , id , file_path )
160
+
161
+ with open (kb_file_path , 'w' ) as kb_file :
162
+ json .dump (kb_data , kb_file , indent = 2 )
211
163
212
164
if __name__ == "__main__" :
213
165
setup_repositories ()
214
166
run_prebuild_script ()
215
- cleanup ()
216
- os .chdir ('../../' )
217
- print (os .listdir ('.' ))
218
167
parse_markdown () # Start parsing logic after all setups
219
- print (os .listdir ('.' ))
220
- clean_tmp ('./.tmp' )
221
168
print ("All processes completed successfully." )
0 commit comments