Skip to content

Commit f7d57ca

Browse files
avoid O(n^2) reading and writing outfile for each infile
1 parent 2015b44 commit f7d57ca

File tree

1 file changed

+11
-28
lines changed

1 file changed

+11
-28
lines changed

app/get_knowledge_base.py

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,8 @@ def reset_knowledge_base():
8585
with open(kb_file_path, 'w') as output_file:
8686
json.dump([], output_file)
8787

88-
def parse_markdown_file_to_json(current_id, file_path):
88+
def parse_markdown_file_to_json(json_output, current_id, file_path):
8989
""" Parses individual markdown file and adds its content to JSON """
90-
try:
91-
# Load existing content if the file exists
92-
with open(kb_file_path, 'r') as existing_file:
93-
json_output = json.load(existing_file)
94-
except (FileNotFoundError, json.JSONDecodeError):
95-
# If the file doesn't exist or is empty, start fresh
96-
json_output = []
97-
9890
with open(file_path, 'r', encoding='utf-8') as file:
9991
lines = file.readlines()
10092

@@ -146,24 +138,12 @@ def parse_markdown_file_to_json(current_id, file_path):
146138
"path": adjust_knowledge_base_entry_path(file_path) # Adjust path format
147139
})
148140

149-
# Write the augmented JSON output to ./data/knowledge_base.json
150-
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
151-
json.dump(json_output, output_file, indent=2, ensure_ascii=False)
152-
153141
def adjust_knowledge_base_entry_path(file_path):
154142
""" Adjusts the file path format for storage. """
155143
return re.sub(r'\/(\d{4})-(\d{2})-(\d{2})-', r'/\1/\2/\3/', file_path.replace("./.tmp/defang-docs", "").replace(".mdx", "").replace(".md", ""))
156144

157-
def parse_cli_markdown(current_id, file_path):
145+
def parse_cli_markdown(json_output, current_id, file_path):
158146
""" Parses CLI-specific markdown files """
159-
try:
160-
# Load existing content if the file exists
161-
with open(kb_file_path, 'r') as existing_file:
162-
json_output = json.load(existing_file)
163-
except (FileNotFoundError, json.JSONDecodeError):
164-
# If the file doesn't exist or is empty, start fresh
165-
json_output = []
166-
167147
with open(file_path, 'r', encoding='utf-8') as file:
168148
lines = file.readlines()
169149

@@ -187,10 +167,6 @@ def parse_cli_markdown(current_id, file_path):
187167
"path": file_path.replace("./.tmp/defang-docs", "").replace(".mdx", "").replace(".md", "")
188168
})
189169

190-
# Write the augmented JSON output to data/knowledge_base.json
191-
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
192-
json.dump(json_output, output_file, indent=2, ensure_ascii=False)
193-
194170
def recursive_parse_directory(root_dir):
195171
""" Recursively parses all markdown files in the directory. """
196172
paths = []
@@ -199,11 +175,18 @@ def recursive_parse_directory(root_dir):
199175
lower_filename = filename.lower()
200176
if lower_filename.endswith('.md') or lower_filename.endswith('.mdx'):
201177
paths.append(os.path.join(dirpath, filename))
178+
179+
with open(kb_file_path, 'r') as kb_file:
180+
kb_data = json.load(kb_file)
181+
202182
for id, file_path in enumerate(paths, start=1):
203183
if 'cli' in dirpath.lower() or 'cli' in filename.lower():
204-
parse_cli_markdown(id, file_path)
184+
parse_cli_markdown(kb_data, id, file_path)
205185
else:
206-
parse_markdown_file_to_json(id, file_path)
186+
parse_markdown_file_to_json(kb_data, id, file_path)
187+
188+
with open(kb_file_path, 'w') as kb_file:
189+
json.dump(kb_data, kb_file, indent=2)
207190

208191
if __name__ == "__main__":
209192
setup_repositories()

0 commit comments

Comments
 (0)