77
88kb_file_path = './data/knowledge_base.json'
99
10- def clean_tmp (dir_path ):
11- """ Clears out all contents of the specified directory except for prebuild.sh """
12- for item in os .listdir (dir_path ):
13- item_path = os .path .join (dir_path , item )
14- if item != "prebuild.sh" : # Keep prebuild.sh
15- if os .path .isdir (item_path ):
16- shutil .rmtree (item_path )
17- else :
18- os .remove (item_path )
19-
2010def clone_repository (repo_url , local_dir ):
2111 """ Clone or pull the repository based on its existence. """
2212 if not os .path .exists (local_dir ):
@@ -30,7 +20,6 @@ def clone_repository(repo_url, local_dir):
3020def setup_repositories ():
3121 tmp_dir = ".tmp"
3222 os .makedirs (tmp_dir , exist_ok = True )
33- clean_tmp (tmp_dir ) # Clean the temporary directory before setting up
3423
3524 # Define repositories and their URLs
3625 repos = {
@@ -39,65 +28,34 @@ def setup_repositories():
3928 "samples" : "https://github.com/DefangLabs/samples.git"
4029 }
4130
42- # Change to the temporary directory
43- original_dir = os .getcwd ()
44- os .chdir (tmp_dir )
45-
4631 # Clone each repository
4732 for repo_name , repo_url in repos .items ():
48- clone_repository (repo_url , repo_name )
49-
50- # Return to the original directory
51- os .chdir (original_dir )
33+ clone_repository (repo_url , os .path .join (tmp_dir , repo_name ))
5234
5335def run_prebuild_script ():
54- """ Run the 'prebuild.sh' script located in the .tmp directory. """
55- os .chdir (".tmp" )
56- script_path = os .path .join ("./" , "prebuild.sh" ) # Ensure the path is correct
57- if os .path .exists (script_path ):
58- print ("Running prebuild.sh..." )
59- try :
60- subprocess .run (["bash" , script_path ], check = True )
61- except subprocess .CalledProcessError as e :
62- print (f"Error running prebuild.sh: { e } " )
63- else :
64- print ("prebuild.sh not found." )
65-
66- def cleanup ():
67- """ Clean up unneeded files, preserving only 'docs' and 'blog' directories """
68- os .chdir ("./defang-docs" )
69- for item in os .listdir ('.' ):
70- if item not in ['docs' , 'blog' ]: # Check if the item is not one of the directories to keep
71- item_path = os .path .join ('.' , item ) # Construct the full path
72- if os .path .isdir (item_path ):
73- shutil .rmtree (item_path ) # Remove the directory and all its contents
74- else :
75- os .remove (item_path ) # Remove the file
76- print ("Cleanup completed successfully." )
36+ """ Run the defang-docs repo prebuild script"""
37+
38+ subprocess .run (
39+ ["npm" , "-C" , ".tmp/defang-docs" , "install" ],
40+ check = True ,
41+ stdout = subprocess .PIPE ,
42+ stderr = subprocess .PIPE
43+ )
44+
45+ subprocess .run (
46+ ["npm" , "-C" , ".tmp/defang-docs" , "run" , "prebuild" ],
47+ check = True ,
48+ stdout = subprocess .PIPE ,
49+ stderr = subprocess .PIPE
50+ )
7751
7852def parse_markdown ():
7953 """ Parse markdown files in the current directory into JSON """
80- reset_knowledge_base () # Reset the JSON database file
8154 recursive_parse_directory ('./.tmp/defang-docs' ) # Parse markdown files in the current directory
8255 print ("Markdown parsing completed successfully." )
8356
84- def reset_knowledge_base ():
85- """ Resets or initializes the knowledge base JSON file. """
86- with open (kb_file_path , 'w' ) as output_file :
87- json .dump ([], output_file )
88-
89- def parse_markdown_file_to_json (file_path ):
57+ def parse_markdown_file_to_json (json_output , current_id , file_path ):
9058 """ Parses individual markdown file and adds its content to JSON """
91- try :
92- # Load existing content if the file exists
93- with open (kb_file_path , 'r' ) as existing_file :
94- json_output = json .load (existing_file )
95- current_id = len (json_output ) + 1 # Start ID from the next available number
96- except (FileNotFoundError , json .JSONDecodeError ):
97- # If the file doesn't exist or is empty, start fresh
98- json_output = []
99- current_id = 1
100-
10159 with open (file_path , 'r' , encoding = 'utf-8' ) as file :
10260 lines = file .readlines ()
10361
@@ -148,28 +106,17 @@ def parse_markdown_file_to_json(file_path):
148106 "text" : text ,
149107 "path" : adjust_knowledge_base_entry_path (file_path ) # Adjust path format
150108 })
151- current_id += 1
152-
153- # Write the augmented JSON output to ./data/knowledge_base.json
154- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
155- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
156109
157110def adjust_knowledge_base_entry_path (file_path ):
158111 """ Adjusts the file path format for storage. """
159- return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , file_path . replace ( "./.tmp/defang-docs" , "" ). replace ( ".mdx" , "" ). replace ( ".md" , "" ))
112+ return re .sub (r'\/(\d{4})-(\d{2})-(\d{2})-' , r'/\1/\2/\3/' , normalize_docs_path ( file_path ))
160113
161- def parse_cli_markdown (file_path ):
162- """ Parses CLI-specific markdown files """
163- try :
164- # Load existing content if the file exists
165- with open (kb_file_path , 'r' ) as existing_file :
166- json_output = json .load (existing_file )
167- current_id = len (json_output ) + 1 # Start ID from the next available number
168- except (FileNotFoundError , json .JSONDecodeError ):
169- # If the file doesn't exist or is empty, start fresh
170- json_output = []
171- current_id = 1
114+ def normalize_docs_path (path ):
115+ """ Normalizes the file path to ensure consistent formatting. """
116+ return path .replace ("./.tmp/defang-docs" , "" ).replace (".mdx" , "" ).replace (".md" , "" )
172117
118+ def parse_cli_markdown (json_output , current_id , file_path ):
119+ """ Parses CLI-specific markdown files """
173120 with open (file_path , 'r' , encoding = 'utf-8' ) as file :
174121 lines = file .readlines ()
175122
@@ -190,32 +137,32 @@ def parse_cli_markdown(file_path):
190137 "id" : current_id ,
191138 "about" : about ,
192139 "text" : text ,
193- "path" : file_path . replace ( "./.tmp/defang-docs" , "" ). replace ( ".mdx" , "" ). replace ( ".md" , "" )
140+ "path" : normalize_docs_path ( file_path )
194141 })
195- current_id += 1
196-
197- # Write the augmented JSON output to data/knowledge_base.json
198- with open (kb_file_path , 'w' , encoding = 'utf-8' ) as output_file :
199- json .dump (json_output , output_file , indent = 2 , ensure_ascii = False )
200142
201143def recursive_parse_directory (root_dir ):
202144 """ Recursively parses all markdown files in the directory. """
203- for dirpath , dirnames , filenames in os .walk (root_dir ):
145+ paths = []
146+ for dirpath , _dirnames , filenames in os .walk (root_dir ):
204147 for filename in filenames :
205- if filename .lower ().endswith ('.md' ) or filename .lower ().endswith ('.mdx' ):
206- file_path = os .path .join (dirpath , filename )
207- if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
208- parse_cli_markdown (file_path )
209- else :
210- parse_markdown_file_to_json (file_path )
148+ lower_filename = filename .lower ()
149+ if lower_filename .endswith ('.md' ) or lower_filename .endswith ('.mdx' ):
150+ paths .append (os .path .join (dirpath , filename ))
151+
152+ with open (kb_file_path , 'r' ) as kb_file :
153+ kb_data = json .load (kb_file )
154+
155+ for id , file_path in enumerate (paths , start = 1 ):
156+ if 'cli' in dirpath .lower () or 'cli' in filename .lower ():
157+ parse_cli_markdown (kb_data , id , file_path )
158+ else :
159+ parse_markdown_file_to_json (kb_data , id , file_path )
160+
161+ with open (kb_file_path , 'w' ) as kb_file :
162+ json .dump (kb_data , kb_file , indent = 2 )
211163
212164if __name__ == "__main__" :
213165 setup_repositories ()
214166 run_prebuild_script ()
215- cleanup ()
216- os .chdir ('../../' )
217- print (os .listdir ('.' ))
218167 parse_markdown () # Start parsing logic after all setups
219- print (os .listdir ('.' ))
220- clean_tmp ('./.tmp' )
221168 print ("All processes completed successfully." )
0 commit comments