77
88kb_file_path = './data/knowledge_base.json'
99
10- def clean_tmp (dir_path ):
11- """ Clears out all contents of the specified directory except for prebuild.sh """
12- for item in os .listdir (dir_path ):
13- item_path = os .path .join (dir_path , item )
14- if item != "prebuild.sh" : # Keep prebuild.sh
15- if os .path .isdir (item_path ):
16- shutil .rmtree (item_path )
17- else :
18- os .remove (item_path )
19-
2010def clone_repository (repo_url , local_dir ):
2111 """ Clone or pull the repository based on its existence. """
2212 if not os .path .exists (local_dir ):
@@ -30,73 +20,42 @@ def clone_repository(repo_url, local_dir):
3020def setup_repositories ():
3121 tmp_dir = ".tmp"
3222 os .makedirs (tmp_dir , exist_ok = True )
33- clean_tmp (tmp_dir ) # Clean the temporary directory before setting up
3423
3524 # Define repositories and their URLs
3625 repos = {
3726 "defang-docs" : "https://github.com/DefangLabs/defang-docs.git" ,
38- "defang" : "https://github.com/DefangLabs/defang.git"
27+ "defang" : "https://github.com/DefangLabs/defang.git" ,
28+ "samples" : "https://github.com/DefangLabs/samples.git"
3929 }
4030
41- # Change to the temporary directory
42- original_dir = os .getcwd ()
43- os .chdir (tmp_dir )
44-
4531 # Clone each repository
4632 for repo_name , repo_url in repos .items ():
47- clone_repository (repo_url , repo_name )
48-
49- # Return to the original directory
50- os .chdir (original_dir )
33+ clone_repository (repo_url , os .path .join (tmp_dir , repo_name ))
5134
5235def run_prebuild_script ():
53- """ Run the 'prebuild.sh' script located in the .tmp directory. """
54- os .chdir (".tmp" )
55- script_path = os .path .join ("./" , "prebuild.sh" ) # Ensure the path is correct
56- if os .path .exists (script_path ):
57- print ("Running prebuild.sh..." )
58- try :
59- subprocess .run (["bash" , script_path ], check = True )
60- except subprocess .CalledProcessError as e :
61- print (f"Error running prebuild.sh: { e } " )
62- else :
63- print ("prebuild.sh not found." )
64-
65- def cleanup ():
66- """ Clean up unneeded files, preserving only 'docs' and 'blog' directories """
67- os .chdir ("./defang-docs" )
68- for item in os .listdir ('.' ):
69- if item not in ['docs' , 'blog' ]: # Check if the item is not one of the directories to keep
70- item_path = os .path .join ('.' , item ) # Construct the full path
71- if os .path .isdir (item_path ):
72- shutil .rmtree (item_path ) # Remove the directory and all its contents
73- else :
74- os .remove (item_path ) # Remove the file
75- print ("Cleanup completed successfully." )
36+ """ Run the defang-docs repo prebuild script"""
37+
38+ subprocess .run (
39+ ["npm" , "-C" , ".tmp/defang-docs" , "install" ],
40+ check = True ,
41+ stdout = subprocess .PIPE ,
42+ stderr = subprocess .PIPE
43+ )
44+
45+ subprocess .run (
46+ ["npm" , "-C" , ".tmp/defang-docs" , "run" , "prebuild" ],
47+ check = True ,
48+ stdout = subprocess .PIPE ,
49+ stderr = subprocess .PIPE
50+ )
7651
7752def parse_markdown ():
7853 """ Parse markdown files in the current directory into JSON """
79- reset_knowledge_base () # Reset the JSON database file
8054 recursive_parse_directory ('./.tmp/defang-docs' ) # Parse markdown files in the current directory
8155 print ("Markdown parsing completed successfully." )
8256
83- def reset_knowledge_base ():
84- """ Resets or initializes the knowledge base JSON file. """
85- with open (kb_file_path , 'w' ) as output_file :
86- json .dump ([], output_file )
87-
88- def parse_markdown_file_to_json (file_path ):
57+ def parse_markdown_file_to_json (json_output , current_id , file_path ):
8958 """ Parses individual markdown file and adds its content to JSON """
90- try :
91- # Load existing content if the file exists
92- with open (kb_file_path , 'r' ) as existing_file :
93- json_output = json .load (existing_file )
94- current_id = len (json_output ) + 1 # Start ID from the next available number
95- except (FileNotFoundError , json .JSONDecodeError ):
96- # If the file doesn't exist or is empty, start fresh
97- json_output = []
98- current_id = 1
99-
10059 with open (file_path , 'r' , encoding = 'utf-8' ) as file :
10160 lines = file .readlines ()
10261
@@ -147,28 +106,17 @@ def parse_markdown_file_to_json(file_path):
147106 "text" : text ,
148107 "path" : adjust_knowledge_base_entry_path (file_path ) # Adjust path format
149108 })
150- current_id += 1
151-
152- # Write the augmented JSON output to ./data/knowledge_base.json
153- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
154- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
155109
156110def adjust_knowledge_base_entry_path (file_path ):
157111 """ Adjusts the file path format for storage. """
158- return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , file_path . replace ( "./.tmp/defang-docs" , "" ). replace ( ".mdx" , "" ). replace ( ".md" , "" ))
112+ return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , normalize_docs_path ( file_path ))
159113
160- def parse_cli_markdown (file_path ):
161- """ Parses CLI-specific markdown files """
162- try :
163- # Load existing content if the file exists
164- with open (kb_file_path , 'r' ) as existing_file :
165- json_output = json .load (existing_file )
166- current_id = len (json_output ) + 1 # Start ID from the next available number
167- except (FileNotFoundError , json .JSONDecodeError ):
168- # If the file doesn't exist or is empty, start fresh
169- json_output = []
170- current_id = 1
114+ def normalize_docs_path (path ):
115+ """ Normalizes the file path to ensure consistent formatting. """
116+ return path .replace ("./.tmp/defang-docs" , "" ).replace (".mdx" , "" ).replace (".md" , "" )
171117
118+ def parse_cli_markdown (json_output , current_id , file_path ):
119+ """ Parses CLI-specific markdown files """
172120 with open (file_path , 'r' , encoding = 'utf-8' ) as file :
173121 lines = file .readlines ()
174122
@@ -189,32 +137,32 @@ def parse_cli_markdown(file_path):
189137 "id" : current_id ,
190138 "about" : about ,
191139 "text" : text ,
192- "path" : file_path . replace ( "./.tmp/defang-docs" , "" ). replace ( ".mdx" , "" ). replace ( ".md" , "" )
140+ "path" : normalize_docs_path ( file_path )
193141 })
194- current_id += 1
195-
196- # Write the augmented JSON output to data/knowledge_base.json
197- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
198- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
199142
200143def recursive_parse_directory (root_dir ):
201144 """ Recursively parses all markdown files in the directory. """
202- for dirpath , dirnames , filenames in os .walk (root_dir ):
145+ paths = []
146+ for dirpath , _dirnames , filenames in os .walk (root_dir ):
203147 for filename in filenames :
204- if filename .lower ().endswith ('.md' ) or filename .lower ().endswith ('.mdx' ):
205- file_path = os .path .join (dirpath , filename )
206- if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
207- parse_cli_markdown (file_path )
208- else :
209- parse_markdown_file_to_json (file_path )
148+ lower_filename = filename .lower ()
149+ if lower_filename .endswith ('.md' ) or lower_filename .endswith ('.mdx' ):
150+ paths .append (os .path .join (dirpath , filename ))
151+
152+ with open (kb_file_path , 'r' ) as kb_file :
153+ kb_data = json .load (kb_file )
154+
155+ for id , file_path in enumerate (paths , start = 1 ):
156+ if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
157+ parse_cli_markdown (kb_data , id , file_path )
158+ else :
159+ parse_markdown_file_to_json (kb_data , id , file_path )
160+
161+ with open (kb_file_path , 'w' ) as kb_file :
162+ json .dump (kb_data , kb_file , indent = 2 )
210163
211164if __name__ == "__main__" :
212165 setup_repositories ()
213166 run_prebuild_script ()
214- cleanup ()
215- os .chdir ('../../' )
216- print (os .listdir ('.' ))
217167 parse_markdown () # Start parsing logic after all setups
218- print (os .listdir ('.' ))
219- clean_tmp ('./.tmp' )
220168 print ("All processes completed successfully." )
0 commit comments