Skip to content

Commit 2015b44

Browse files
avoid O(n^2) list counting for each id
1 parent 5bc4c39 commit 2015b44

File tree

1 file changed

+5
-11
lines changed

1 file changed

+5
-11
lines changed

app/get_knowledge_base.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -85,17 +85,15 @@ def reset_knowledge_base():
8585
with open(kb_file_path, 'w') as output_file:
8686
json.dump([], output_file)
8787

88-
def parse_markdown_file_to_json(file_path):
88+
def parse_markdown_file_to_json(current_id, file_path):
8989
""" Parses individual markdown file and adds its content to JSON """
9090
try:
9191
# Load existing content if the file exists
9292
with open(kb_file_path, 'r') as existing_file:
9393
json_output = json.load(existing_file)
94-
current_id = len(json_output) + 1 # Start ID from the next available number
9594
except (FileNotFoundError, json.JSONDecodeError):
9695
# If the file doesn't exist or is empty, start fresh
9796
json_output = []
98-
current_id = 1
9997

10098
with open(file_path, 'r', encoding='utf-8') as file:
10199
lines = file.readlines()
@@ -147,7 +145,6 @@ def parse_markdown_file_to_json(file_path):
147145
"text": text,
148146
"path": adjust_knowledge_base_entry_path(file_path) # Adjust path format
149147
})
150-
current_id += 1
151148

152149
# Write the augmented JSON output to ./data/knowledge_base.json
153150
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
@@ -157,17 +154,15 @@ def adjust_knowledge_base_entry_path(file_path):
157154
""" Adjusts the file path format for storage. """
158155
return re.sub(r'\/(\d{4})-(\d{2})-(\d{2})-', r'/\1/\2/\3/', file_path.replace("./.tmp/defang-docs", "").replace(".mdx", "").replace(".md", ""))
159156

160-
def parse_cli_markdown(file_path):
157+
def parse_cli_markdown(current_id, file_path):
161158
""" Parses CLI-specific markdown files """
162159
try:
163160
# Load existing content if the file exists
164161
with open(kb_file_path, 'r') as existing_file:
165162
json_output = json.load(existing_file)
166-
current_id = len(json_output) + 1 # Start ID from the next available number
167163
except (FileNotFoundError, json.JSONDecodeError):
168164
# If the file doesn't exist or is empty, start fresh
169165
json_output = []
170-
current_id = 1
171166

172167
with open(file_path, 'r', encoding='utf-8') as file:
173168
lines = file.readlines()
@@ -191,7 +186,6 @@ def parse_cli_markdown(file_path):
191186
"text": text,
192187
"path": file_path.replace("./.tmp/defang-docs", "").replace(".mdx", "").replace(".md", "")
193188
})
194-
current_id += 1
195189

196190
# Write the augmented JSON output to data/knowledge_base.json
197191
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
@@ -205,11 +199,11 @@ def recursive_parse_directory(root_dir):
205199
lower_filename = filename.lower()
206200
if lower_filename.endswith('.md') or lower_filename.endswith('.mdx'):
207201
paths.append(os.path.join(dirpath, filename))
208-
for file_path in paths:
202+
for id, file_path in enumerate(paths, start=1):
209203
if 'cli' in dirpath.lower() or 'cli' in filename.lower():
210-
parse_cli_markdown(file_path)
204+
parse_cli_markdown(id, file_path)
211205
else:
212-
parse_markdown_file_to_json(file_path)
206+
parse_markdown_file_to_json(id, file_path)
213207

214208
if __name__ == "__main__":
215209
setup_repositories()

0 commit comments

Comments
 (0)