Skip to content

Commit d035130

Browse files
avoid O(n^2) list counting for each id
1 parent e0f19bc commit d035130

File tree

1 file changed

+5
-11
lines changed

1 file changed

+5
-11
lines changed

app/get_knowledge_base.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,15 @@ def reset_knowledge_base():
8686
with open(kb_file_path, 'w') as output_file:
8787
json.dump([], output_file)
8888

89-
def parse_markdown_file_to_json(file_path):
89+
def parse_markdown_file_to_json(current_id, file_path):
9090
""" Parses individual markdown file and adds its content to JSON """
9191
try:
9292
# Load existing content if the file exists
9393
with open(kb_file_path, 'r') as existing_file:
9494
json_output = json.load(existing_file)
95-
current_id = len(json_output) + 1 # Start ID from the next available number
9695
except (FileNotFoundError, json.JSONDecodeError):
9796
# If the file doesn't exist or is empty, start fresh
9897
json_output = []
99-
current_id = 1
10098

10199
with open(file_path, 'r', encoding='utf-8') as file:
102100
lines = file.readlines()
@@ -148,7 +146,6 @@ def parse_markdown_file_to_json(file_path):
148146
"text": text,
149147
"path": adjust_knowledge_base_entry_path(file_path) # Adjust path format
150148
})
151-
current_id += 1
152149

153150
# Write the augmented JSON output to ./data/knowledge_base.json
154151
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
@@ -158,17 +155,15 @@ def adjust_knowledge_base_entry_path(file_path):
158155
""" Adjusts the file path format for storage. """
159156
return re.sub(r'\/(\d{4})-(\d{2})-(\d{2})-', r'/\1/\2/\3/', file_path.replace("./.tmp/defang-docs", "").replace(".mdx", "").replace(".md", ""))
160157

161-
def parse_cli_markdown(file_path):
158+
def parse_cli_markdown(current_id, file_path):
162159
""" Parses CLI-specific markdown files """
163160
try:
164161
# Load existing content if the file exists
165162
with open(kb_file_path, 'r') as existing_file:
166163
json_output = json.load(existing_file)
167-
current_id = len(json_output) + 1 # Start ID from the next available number
168164
except (FileNotFoundError, json.JSONDecodeError):
169165
# If the file doesn't exist or is empty, start fresh
170166
json_output = []
171-
current_id = 1
172167

173168
with open(file_path, 'r', encoding='utf-8') as file:
174169
lines = file.readlines()
@@ -192,7 +187,6 @@ def parse_cli_markdown(file_path):
192187
"text": text,
193188
"path": file_path.replace("./.tmp/defang-docs", "").replace(".mdx", "").replace(".md", "")
194189
})
195-
current_id += 1
196190

197191
# Write the augmented JSON output to data/knowledge_base.json
198192
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
@@ -206,11 +200,11 @@ def recursive_parse_directory(root_dir):
206200
lower_filename = filename.lower()
207201
if lower_filename.endswith('.md') or lower_filename.endswith('.mdx'):
208202
paths.append(os.path.join(dirpath, filename))
209-
for file_path in paths:
203+
for id, file_path in enumerate(paths, start=1):
210204
if 'cli' in dirpath.lower() or 'cli' in filename.lower():
211-
parse_cli_markdown(file_path)
205+
parse_cli_markdown(id, file_path)
212206
else:
213-
parse_markdown_file_to_json(file_path)
207+
parse_markdown_file_to_json(id, file_path)
214208

215209
if __name__ == "__main__":
216210
setup_repositories()

0 commit comments

Comments
 (0)