7
7
8
8
kb_file_path = './data/knowledge_base.json'
9
9
10
- def clean_tmp (dir_path ):
11
- """ Clears out all contents of the specified directory except for prebuild.sh """
12
- for item in os .listdir (dir_path ):
13
- item_path = os .path .join (dir_path , item )
14
- if item != "prebuild.sh" : # Keep prebuild.sh
15
- if os .path .isdir (item_path ):
16
- shutil .rmtree (item_path )
17
- else :
18
- os .remove (item_path )
19
-
20
10
def clone_repository (repo_url , local_dir ):
21
11
""" Clone or pull the repository based on its existence. """
22
12
if not os .path .exists (local_dir ):
@@ -30,73 +20,42 @@ def clone_repository(repo_url, local_dir):
30
20
def setup_repositories ():
31
21
tmp_dir = ".tmp"
32
22
os .makedirs (tmp_dir , exist_ok = True )
33
- clean_tmp (tmp_dir ) # Clean the temporary directory before setting up
34
23
35
24
# Define repositories and their URLs
36
25
repos = {
37
26
"defang-docs" : "https://github.com/DefangLabs/defang-docs.git" ,
38
- "defang" : "https://github.com/DefangLabs/defang.git"
27
+ "defang" : "https://github.com/DefangLabs/defang.git" ,
28
+ "samples" : "https://github.com/DefangLabs/samples.git"
39
29
}
40
30
41
- # Change to the temporary directory
42
- original_dir = os .getcwd ()
43
- os .chdir (tmp_dir )
44
-
45
31
# Clone each repository
46
32
for repo_name , repo_url in repos .items ():
47
- clone_repository (repo_url , repo_name )
48
-
49
- # Return to the original directory
50
- os .chdir (original_dir )
33
+ clone_repository (repo_url , os .path .join (tmp_dir , repo_name ))
51
34
52
35
def run_prebuild_script ():
53
- """ Run the 'prebuild.sh' script located in the .tmp directory. """
54
- os .chdir (".tmp" )
55
- script_path = os .path .join ("./" , "prebuild.sh" ) # Ensure the path is correct
56
- if os .path .exists (script_path ):
57
- print ("Running prebuild.sh..." )
58
- try :
59
- subprocess .run (["bash" , script_path ], check = True )
60
- except subprocess .CalledProcessError as e :
61
- print (f"Error running prebuild.sh: { e } " )
62
- else :
63
- print ("prebuild.sh not found." )
64
-
65
- def cleanup ():
66
- """ Clean up unneeded files, preserving only 'docs' and 'blog' directories """
67
- os .chdir ("./defang-docs" )
68
- for item in os .listdir ('.' ):
69
- if item not in ['docs' , 'blog' ]: # Check if the item is not one of the directories to keep
70
- item_path = os .path .join ('.' , item ) # Construct the full path
71
- if os .path .isdir (item_path ):
72
- shutil .rmtree (item_path ) # Remove the directory and all its contents
73
- else :
74
- os .remove (item_path ) # Remove the file
75
- print ("Cleanup completed successfully." )
36
+ """ Run the defang-docs repo prebuild script"""
37
+
38
+ subprocess .run (
39
+ ["npm" , "-C" , ".tmp/defang-docs" , "install" ],
40
+ check = True ,
41
+ stdout = subprocess .PIPE ,
42
+ stderr = subprocess .PIPE
43
+ )
44
+
45
+ subprocess .run (
46
+ ["npm" , "-C" , ".tmp/defang-docs" , "run" , "prebuild" ],
47
+ check = True ,
48
+ stdout = subprocess .PIPE ,
49
+ stderr = subprocess .PIPE
50
+ )
76
51
77
52
def parse_markdown ():
78
53
""" Parse markdown files in the current directory into JSON """
79
- reset_knowledge_base () # Reset the JSON database file
80
54
recursive_parse_directory ('./.tmp/defang-docs' ) # Parse markdown files in the current directory
81
55
print ("Markdown parsing completed successfully." )
82
56
83
- def reset_knowledge_base ():
84
- """ Resets or initializes the knowledge base JSON file. """
85
- with open (kb_file_path , 'w' ) as output_file :
86
- json .dump ([], output_file )
87
-
88
- def parse_markdown_file_to_json (file_path ):
57
+ def parse_markdown_file_to_json (json_output , current_id , file_path ):
89
58
""" Parses individual markdown file and adds its content to JSON """
90
- try :
91
- # Load existing content if the file exists
92
- with open (kb_file_path , 'r' ) as existing_file :
93
- json_output = json .load (existing_file )
94
- current_id = len (json_output ) + 1 # Start ID from the next available number
95
- except (FileNotFoundError , json .JSONDecodeError ):
96
- # If the file doesn't exist or is empty, start fresh
97
- json_output = []
98
- current_id = 1
99
-
100
59
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
101
60
lines = file .readlines ()
102
61
@@ -147,28 +106,17 @@ def parse_markdown_file_to_json(file_path):
147
106
"text" : text ,
148
107
"path" : adjust_knowledge_base_entry_path (file_path ) # Adjust path format
149
108
})
150
- current_id += 1
151
-
152
- # Write the augmented JSON output to ./data/knowledge_base.json
153
- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
154
- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
155
109
156
110
def adjust_knowledge_base_entry_path (file_path ):
157
111
""" Adjusts the file path format for storage. """
158
- return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , file_path . replace ( "./.tmp/defang-docs" , "" ). replace ( ".mdx" , "" ). replace ( ".md" , "" ))
112
+ return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , normalize_docs_path ( file_path ))
159
113
160
- def parse_cli_markdown (file_path ):
161
- """ Parses CLI-specific markdown files """
162
- try :
163
- # Load existing content if the file exists
164
- with open (kb_file_path , 'r' ) as existing_file :
165
- json_output = json .load (existing_file )
166
- current_id = len (json_output ) + 1 # Start ID from the next available number
167
- except (FileNotFoundError , json .JSONDecodeError ):
168
- # If the file doesn't exist or is empty, start fresh
169
- json_output = []
170
- current_id = 1
114
+ def normalize_docs_path (path ):
115
+ """ Normalizes the file path to ensure consistent formatting. """
116
+ return path .replace ("./.tmp/defang-docs" , "" ).replace (".mdx" , "" ).replace (".md" , "" )
171
117
118
+ def parse_cli_markdown (json_output , current_id , file_path ):
119
+ """ Parses CLI-specific markdown files """
172
120
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
173
121
lines = file .readlines ()
174
122
@@ -189,32 +137,32 @@ def parse_cli_markdown(file_path):
189
137
"id" : current_id ,
190
138
"about" : about ,
191
139
"text" : text ,
192
- "path" : file_path . replace ( "./.tmp/defang-docs" , "" ). replace ( ".mdx" , "" ). replace ( ".md" , "" )
140
+ "path" : normalize_docs_path ( file_path )
193
141
})
194
- current_id += 1
195
-
196
- # Write the augmented JSON output to data/knowledge_base.json
197
- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
198
- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
199
142
200
143
def recursive_parse_directory (root_dir ):
201
144
""" Recursively parses all markdown files in the directory. """
202
- for dirpath , dirnames , filenames in os .walk (root_dir ):
145
+ paths = []
146
+ for dirpath , _dirnames , filenames in os .walk (root_dir ):
203
147
for filename in filenames :
204
- if filename .lower ().endswith ('.md' ) or filename .lower ().endswith ('.mdx' ):
205
- file_path = os .path .join (dirpath , filename )
206
- if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
207
- parse_cli_markdown (file_path )
208
- else :
209
- parse_markdown_file_to_json (file_path )
148
+ lower_filename = filename .lower ()
149
+ if lower_filename .endswith ('.md' ) or lower_filename .endswith ('.mdx' ):
150
+ paths .append (os .path .join (dirpath , filename ))
151
+
152
+ with open (kb_file_path , 'r' ) as kb_file :
153
+ kb_data = json .load (kb_file )
154
+
155
+ for id , file_path in enumerate (paths , start = 1 ):
156
+ if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
157
+ parse_cli_markdown (kb_data , id , file_path )
158
+ else :
159
+ parse_markdown_file_to_json (kb_data , id , file_path )
160
+
161
+ with open (kb_file_path , 'w' ) as kb_file :
162
+ json .dump (kb_data , kb_file , indent = 2 )
210
163
211
164
if __name__ == "__main__" :
212
165
setup_repositories ()
213
166
run_prebuild_script ()
214
- cleanup ()
215
- os .chdir ('../../' )
216
- print (os .listdir ('.' ))
217
167
parse_markdown () # Start parsing logic after all setups
218
- print (os .listdir ('.' ))
219
- clean_tmp ('./.tmp' )
220
168
print ("All processes completed successfully." )
0 commit comments