@@ -86,16 +86,8 @@ def reset_knowledge_base():
86
86
with open (kb_file_path , 'w' ) as output_file :
87
87
json .dump ([], output_file )
88
88
89
- def parse_markdown_file_to_json (current_id , file_path ):
89
+ def parse_markdown_file_to_json (json_output , current_id , file_path ):
90
90
""" Parses individual markdown file and adds its content to JSON """
91
- try :
92
- # Load existing content if the file exists
93
- with open (kb_file_path , 'r' ) as existing_file :
94
- json_output = json .load (existing_file )
95
- except (FileNotFoundError , json .JSONDecodeError ):
96
- # If the file doesn't exist or is empty, start fresh
97
- json_output = []
98
-
99
91
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
100
92
lines = file .readlines ()
101
93
@@ -147,24 +139,12 @@ def parse_markdown_file_to_json(current_id, file_path):
147
139
"path" : adjust_knowledge_base_entry_path (file_path ) # Adjust path format
148
140
})
149
141
150
- # Write the augmented JSON output to ./data/knowledge_base.json
151
- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
152
- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
153
-
154
142
def adjust_knowledge_base_entry_path (file_path ):
155
143
""" Adjusts the file path format for storage. """
156
144
return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , file_path .replace ("./.tmp/defang-docs" , "" ).replace (".mdx" , "" ).replace (".md" , "" ))
157
145
158
- def parse_cli_markdown (current_id , file_path ):
146
+ def parse_cli_markdown (json_output , current_id , file_path ):
159
147
""" Parses CLI-specific markdown files """
160
- try :
161
- # Load existing content if the file exists
162
- with open (kb_file_path , 'r' ) as existing_file :
163
- json_output = json .load (existing_file )
164
- except (FileNotFoundError , json .JSONDecodeError ):
165
- # If the file doesn't exist or is empty, start fresh
166
- json_output = []
167
-
168
148
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
169
149
lines = file .readlines ()
170
150
@@ -188,10 +168,6 @@ def parse_cli_markdown(current_id, file_path):
188
168
"path" : file_path .replace ("./.tmp/defang-docs" , "" ).replace (".mdx" , "" ).replace (".md" , "" )
189
169
})
190
170
191
- # Write the augmented JSON output to data/knowledge_base.json
192
- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
193
- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
194
-
195
171
def recursive_parse_directory (root_dir ):
196
172
""" Recursively parses all markdown files in the directory. """
197
173
paths = []
@@ -200,11 +176,18 @@ def recursive_parse_directory(root_dir):
200
176
lower_filename = filename .lower ()
201
177
if lower_filename .endswith ('.md' ) or lower_filename .endswith ('.mdx' ):
202
178
paths .append (os .path .join (dirpath , filename ))
179
+
180
+ with open (kb_file_path , 'r' ) as kb_file :
181
+ kb_data = json .load (kb_file )
182
+
203
183
for id , file_path in enumerate (paths , start = 1 ):
204
184
if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
205
- parse_cli_markdown (id , file_path )
185
+ parse_cli_markdown (kb_data , id , file_path )
206
186
else :
207
- parse_markdown_file_to_json (id , file_path )
187
+ parse_markdown_file_to_json (kb_data , id , file_path )
188
+
189
+ with open (kb_file_path , 'w' ) as kb_file :
190
+ json .dump (kb_data , kb_file , indent = 2 )
208
191
209
192
if __name__ == "__main__" :
210
193
setup_repositories ()
0 commit comments