Skip to content

Commit 482901f

Browse files
avoid O(n^2) reading and writing outfile for each infile
1 parent d035130 commit 482901f

File tree

1 file changed

+11
-28
lines changed

1 file changed

+11
-28
lines changed

app/get_knowledge_base.py

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,8 @@ def reset_knowledge_base():
8686
with open(kb_file_path, 'w') as output_file:
8787
json.dump([], output_file)
8888

89-
def parse_markdown_file_to_json(current_id, file_path):
89+
def parse_markdown_file_to_json(json_output, current_id, file_path):
9090
""" Parses individual markdown file and adds its content to JSON """
91-
try:
92-
# Load existing content if the file exists
93-
with open(kb_file_path, 'r') as existing_file:
94-
json_output = json.load(existing_file)
95-
except (FileNotFoundError, json.JSONDecodeError):
96-
# If the file doesn't exist or is empty, start fresh
97-
json_output = []
98-
9991
with open(file_path, 'r', encoding='utf-8') as file:
10092
lines = file.readlines()
10193

@@ -147,24 +139,12 @@ def parse_markdown_file_to_json(current_id, file_path):
147139
"path": adjust_knowledge_base_entry_path(file_path) # Adjust path format
148140
})
149141

150-
# Write the augmented JSON output to ./data/knowledge_base.json
151-
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
152-
json.dump(json_output, output_file, indent=2, ensure_ascii=False)
153-
154142
def adjust_knowledge_base_entry_path(file_path):
155143
""" Adjusts the file path format for storage. """
156144
return re.sub(r'\/(\d{4})-(\d{2})-(\d{2})-', r'/\1/\2/\3/', file_path.replace("./.tmp/defang-docs", "").replace(".mdx", "").replace(".md", ""))
157145

158-
def parse_cli_markdown(current_id, file_path):
146+
def parse_cli_markdown(json_output, current_id, file_path):
159147
""" Parses CLI-specific markdown files """
160-
try:
161-
# Load existing content if the file exists
162-
with open(kb_file_path, 'r') as existing_file:
163-
json_output = json.load(existing_file)
164-
except (FileNotFoundError, json.JSONDecodeError):
165-
# If the file doesn't exist or is empty, start fresh
166-
json_output = []
167-
168148
with open(file_path, 'r', encoding='utf-8') as file:
169149
lines = file.readlines()
170150

@@ -188,10 +168,6 @@ def parse_cli_markdown(current_id, file_path):
188168
"path": file_path.replace("./.tmp/defang-docs", "").replace(".mdx", "").replace(".md", "")
189169
})
190170

191-
# Write the augmented JSON output to data/knowledge_base.json
192-
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
193-
json.dump(json_output, output_file, indent=2, ensure_ascii=False)
194-
195171
def recursive_parse_directory(root_dir):
196172
""" Recursively parses all markdown files in the directory. """
197173
paths = []
@@ -200,11 +176,18 @@ def recursive_parse_directory(root_dir):
200176
lower_filename = filename.lower()
201177
if lower_filename.endswith('.md') or lower_filename.endswith('.mdx'):
202178
paths.append(os.path.join(dirpath, filename))
179+
180+
with open(kb_file_path, 'r') as kb_file:
181+
kb_data = json.load(kb_file)
182+
203183
for id, file_path in enumerate(paths, start=1):
204184
if 'cli' in dirpath.lower() or 'cli' in filename.lower():
205-
parse_cli_markdown(id, file_path)
185+
parse_cli_markdown(kb_data, id, file_path)
206186
else:
207-
parse_markdown_file_to_json(id, file_path)
187+
parse_markdown_file_to_json(kb_data, id, file_path)
188+
189+
with open(kb_file_path, 'w') as kb_file:
190+
json.dump(kb_data, kb_file, indent=2)
208191

209192
if __name__ == "__main__":
210193
setup_repositories()

0 commit comments

Comments
 (0)