@@ -85,16 +85,8 @@ def reset_knowledge_base():
85
85
with open (kb_file_path , 'w' ) as output_file :
86
86
json .dump ([], output_file )
87
87
88
- def parse_markdown_file_to_json (current_id , file_path ):
88
+ def parse_markdown_file_to_json (json_output , current_id , file_path ):
89
89
""" Parses individual markdown file and adds its content to JSON """
90
- try :
91
- # Load existing content if the file exists
92
- with open (kb_file_path , 'r' ) as existing_file :
93
- json_output = json .load (existing_file )
94
- except (FileNotFoundError , json .JSONDecodeError ):
95
- # If the file doesn't exist or is empty, start fresh
96
- json_output = []
97
-
98
90
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
99
91
lines = file .readlines ()
100
92
@@ -146,24 +138,12 @@ def parse_markdown_file_to_json(current_id, file_path):
146
138
"path" : adjust_knowledge_base_entry_path (file_path ) # Adjust path format
147
139
})
148
140
149
- # Write the augmented JSON output to ./data/knowledge_base.json
150
- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
151
- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
152
-
153
141
def adjust_knowledge_base_entry_path (file_path ):
154
142
""" Adjusts the file path format for storage. """
155
143
return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , file_path .replace ("./.tmp/defang-docs" , "" ).replace (".mdx" , "" ).replace (".md" , "" ))
156
144
157
- def parse_cli_markdown (current_id , file_path ):
145
+ def parse_cli_markdown (json_output , current_id , file_path ):
158
146
""" Parses CLI-specific markdown files """
159
- try :
160
- # Load existing content if the file exists
161
- with open (kb_file_path , 'r' ) as existing_file :
162
- json_output = json .load (existing_file )
163
- except (FileNotFoundError , json .JSONDecodeError ):
164
- # If the file doesn't exist or is empty, start fresh
165
- json_output = []
166
-
167
147
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
168
148
lines = file .readlines ()
169
149
@@ -187,10 +167,6 @@ def parse_cli_markdown(current_id, file_path):
187
167
"path" : file_path .replace ("./.tmp/defang-docs" , "" ).replace (".mdx" , "" ).replace (".md" , "" )
188
168
})
189
169
190
- # Write the augmented JSON output to data/knowledge_base.json
191
- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
192
- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
193
-
194
170
def recursive_parse_directory (root_dir ):
195
171
""" Recursively parses all markdown files in the directory. """
196
172
paths = []
@@ -199,11 +175,18 @@ def recursive_parse_directory(root_dir):
199
175
lower_filename = filename .lower ()
200
176
if lower_filename .endswith ('.md' ) or lower_filename .endswith ('.mdx' ):
201
177
paths .append (os .path .join (dirpath , filename ))
178
+
179
+ with open (kb_file_path , 'r' ) as kb_file :
180
+ kb_data = json .load (kb_file )
181
+
202
182
for id , file_path in enumerate (paths , start = 1 ):
203
183
if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
204
- parse_cli_markdown (id , file_path )
184
+ parse_cli_markdown (kb_data , id , file_path )
205
185
else :
206
- parse_markdown_file_to_json (id , file_path )
186
+ parse_markdown_file_to_json (kb_data , id , file_path )
187
+
188
+ with open (kb_file_path , 'w' ) as kb_file :
189
+ json .dump (kb_data , kb_file , indent = 2 )
207
190
208
191
if __name__ == "__main__" :
209
192
setup_repositories ()
0 commit comments