3131import multiprocessing
3232import requests
3333
34- # Multi-process shared objects
35- download_cnt = multiprocessing .Value ('d' , 0 ) # Count downloaded models
36- issue_cnt = multiprocessing .Value ('d' , 0 ) # Count issues
37- model_list_num = multiprocessing .Value ('d' , 0 )
3834
39-
40- def download_unpack_zip ( zip_name ):
35+ def download_unpack_zip ( zip_name , output_path , zip_path ,
36+ git_hub_repo_url , force_download ):
4137 """
4238 Download and unpack one zip file
4339 """
@@ -51,40 +47,35 @@ def download_unpack_zip(zip_name):
5147 model_out_folder = output_path / name
5248
5349 if model_out_folder .is_dir ():
54- if FORCE_DOWNLOAD :
50+ if force_download :
5551 print ("[INFO] --force option is set. {} will be rewritten" .
5652 format (model_out_folder ))
5753 shutil .rmtree (model_out_folder )
5854 else :
5955 print ("[INFO] Skip downloading {}. Model already exists here {}" .
6056 format (zip_name , model_out_folder ))
61- return
57+ return False
6258
6359 file_name_url = git_hub_repo_url + '/' + zip_name
64- file_name_zip_out = zip_path / zip_name
60+ file_name_zip = zip_path / zip_name
6561
6662 print ("[INFO] Start downloading {}" .format (zip_name ))
6763 request_check = requests .get (file_name_url )
6864 if request_check .status_code != 200 :
69- issue_cnt .value += 1
7065 print (" [ERROR]. Can not access to Zip file by address {}" .
7166 format (file_name_url ))
72- print ("[INFO] File {} is missed. {} from {} "
73- .format (zip_name , int (issue_cnt .value ),
74- int (model_list_num .value )))
75- return
67+ return False
7668
7769 with requests .get (file_name_url , stream = True ) as r_zip_file :
78- with open (file_name_zip_out , 'wb' ) as f_download :
70+ with open (file_name_zip , 'wb' ) as f_download :
7971 shutil .copyfileobj (r_zip_file .raw , f_download )
8072
81- with zipfile .ZipFile (file_name_zip_out , 'r' ) as zip_ref :
73+ with zipfile .ZipFile (file_name_zip , 'r' ) as zip_ref :
8274 zip_ref .extractall (output_path )
8375
84- download_cnt .value += 1
85- print ("[INFO] {} is downloaded and unpacked. {}th from {} zip files"
86- .format (zip_name , int (download_cnt .value ),
87- int (model_list_num .value )))
76+ print ("[INFO] {} is downloaded and unpacked"
77+ .format (zip_name ))
78+ return True
8879
8980
9081def arg2bool (bool_arg ):
@@ -102,113 +93,133 @@ def arg2bool(bool_arg):
10293 raise argparse .ArgumentTypeError ('Boolean value expected.' )
10394
10495
105- parser = argparse .ArgumentParser ()
106-
107- parser .add_argument (
108- "--model_path" , type = str , required = False ,
109- help = "Path to NN Models home" )
110-
111- parser .add_argument (
112- "--model_list" , type = str , required = True ,
113- help = "File with list of models" )
114-
115- parser .add_argument (
116- "--force" , type = arg2bool , nargs = '?' , const = True , required = False ,
117- help = "Force to download/unpack models. Overwrite existed" )
118-
119- args = parser .parse_args ()
120-
121- list_file = Path (args .model_list )
122-
123- FORCE_DOWNLOAD = False # Rewrite existed models
124- if args .force is not None :
125- if args .force :
126- FORCE_DOWNLOAD = True
127-
128- if not list_file .is_file ():
129- print ("[ERROR]. List file {} is not found" .format (list_file ))
130- exit (1 )
131-
132- # Get a file with models
133- with open (list_file , "r" ) as f :
134- zips_files_lines = f .readlines ()
135-
136- # Create zip lists from list_file
137- model_list = []
138- for line in zips_files_lines :
139- model_name = line .strip ()
140- # Avoid comments and empty strings
141- if model_name .isspace () or model_name == "" or model_name [0 ] == '#' :
142- # print("[DEBUG] string is empty or comment {}".format(model_name))
143- continue
144- model_list .extend ([model_name ])
145-
146- model_list_num .value = len (model_list )
147-
148- if model_list_num == 0 :
149- print ("[ERROR]. There is no model names in model list file {}" .
150- format (list_file ))
151- exit (1 )
152-
153- print ("[INFO] Number of models in the model list is {}"
154- .format (model_list_num .value ))
155-
156- # Get and check output path
157- if args .model_path :
158- output_path = Path (args .model_path )/ "caffe_models"
159- elif os .environ ['EV_CNNMODELS_HOME' ]:
160- output_path = Path (os .environ ['EV_CNNMODELS_HOME' ]) / "caffe_models"
161- else :
162- print ("[ERROR]. Output path is not defined. "
163- "Use --model_path or setup EV_CNNMODELS_HOME environment variable" )
164- exit (1 )
165- try :
166- output_path .mkdir (parents = True , exist_ok = True )
167- except :
168- print ("[ERROR]. Problem of creating CNN Models folder {}" .
169- format (output_path ))
170- exit (1 )
171-
172- zip_path = output_path / "download" # folder for downloaded zip files
173- try :
174- zip_path .mkdir (parents = True , exist_ok = True )
175- except :
176- print (
177- "[ERROR]. Problem of creating folder to download zipped CNN Models: {}"
178- .format (zip_path ))
179- exit (1 )
180-
181- print ("[INFO] NN Models path is {}" .format (output_path ))
182- print ("[INFO] Zipped NN Models path is {}" .format (zip_path ))
183-
184- git_hub_repo_url = 'https://github.com/foss-for-synopsys-dwc-arc-processors/synopsys-caffe-models/raw/master/caffe_models_zipped'
185-
186- # check GitHub site access
187- request = requests .get (git_hub_repo_url )
188- if request .status_code != 200 :
189- print ("[ERROR]. Can not access to GitHub by address {}"
190- .format (git_hub_repo_url ))
191- exit (1 )
192-
193- # Download and unpack zip files in parallel mode
194- download_cnt .value = 0
195- issue_cnt .value = 0
196- MAX_NUM_CORE = 8
197- num_cores = multiprocessing .cpu_count ()
198- parallel_task = num_cores if num_cores < MAX_NUM_CORE else MAX_NUM_CORE
199-
200- start_time = time .time ()
201- with multiprocessing .Pool (parallel_task ) as p :
202- p .map (download_unpack_zip , model_list )
203-
204- end_time = int (time .time () - start_time )
205- hours , rem = divmod (end_time , 3600 )
206- minutes , seconds = divmod (rem , 60 )
207-
208- print ("[INFO] Finish. \n "
209- " Total models: {}\n "
210- " Successful: {}\n "
211- " Failed : {}\n "
212- " Time: {:0>2}:{:0>2}:{:05.2f}" .
213- format (int (model_list_num .value ), int (download_cnt .value ),
214- int (issue_cnt .value ), int (hours ), int (minutes ), int (seconds )))
96+ def main ():
97+ git_hub_repo_url = 'https://github.com/foss-for-synopsys-dwc-arc-processors/synopsys-caffe-models/raw/master/caffe_models_zipped'
98+
99+ parser = argparse .ArgumentParser ()
100+
101+ parser .add_argument (
102+ "--model_path" , type = str , required = False ,
103+ help = "Path to NN Models home" )
104+
105+ parser .add_argument (
106+ "--model_list" , type = str , required = True ,
107+ help = "File with list of models" )
108+
109+ parser .add_argument (
110+ "--force" , type = arg2bool , nargs = '?' , const = True , required = False ,
111+ help = "Force to download/unpack models. Overwrite existed" )
112+
113+ args = parser .parse_args ()
114+
115+ list_file = Path (args .model_list )
116+
117+ force_download = False # Rewrite existed models
118+ if args .force is not None :
119+ if args .force :
120+ force_download = True
121+
122+ if not list_file .is_file ():
123+ print ("[ERROR]. List file {} is not found" .format (list_file ))
124+ exit (1 )
125+
126+ # Get a file with models
127+ with open (list_file , "r" ) as f :
128+ zips_files_lines = f .readlines ()
129+
130+ # Create zip lists from list_file
131+ model_list = []
132+ for line in zips_files_lines :
133+ model_name = line .strip ()
134+ # Avoid comments and empty strings
135+ if model_name .isspace () or model_name == "" or model_name [0 ] == '#' :
136+ # print("[DEBUG] string is empty or comment {}".format(model_name))
137+ continue
138+ model_list .extend ([model_name ])
139+
140+ model_list_num = len (model_list )
141+
142+ if model_list_num == 0 :
143+ print ("[ERROR]. There is no model names in model list file {}" .
144+ format (list_file ))
145+ exit (1 )
146+
147+ print ("[INFO] Number of models in the model list is {}"
148+ .format (model_list_num ))
149+
150+ # Get and check output path
151+ if args .model_path :
152+ output_path = Path (args .model_path )/ "caffe_models"
153+ elif os .environ ['EV_CNNMODELS_HOME' ]:
154+ output_path = Path (os .environ ['EV_CNNMODELS_HOME' ]) / "caffe_models"
155+ else :
156+ print ("[ERROR]. Output path is not defined. Use --model_path or "
157+ "setup EV_CNNMODELS_HOME environment variable" )
158+ exit (1 )
159+ try :
160+ output_path .mkdir (parents = True , exist_ok = True )
161+ except :
162+ print ("[ERROR]. Problem of creating CNN Models folder {}" .
163+ format (output_path ))
164+ exit (1 )
165+
166+ zip_path = output_path / "download" # folder for downloaded zip files
167+ try :
168+ zip_path .mkdir (parents = True , exist_ok = True )
169+ except :
170+ print (
171+ "[ERROR]. Problem of creating folder to download zipped CNN Models:"
172+ " {}" .format (zip_path ))
173+ exit (1 )
174+
175+ print ("[INFO] NN Models path is {}" .format (output_path ))
176+ print ("[INFO] Zipped NN Models path is {}" .format (zip_path ))
177+
178+ # check GitHub site access
179+ request = requests .get (git_hub_repo_url )
180+ if request .status_code != 200 :
181+ print ("[ERROR]. Can not access to GitHub by address {}"
182+ .format (git_hub_repo_url ))
183+ exit (1 )
184+
185+ # Download and unpack zip files in parallel mode
186+ list_of_params = []
187+
188+ # Use multi-processing to speed-up zipping
189+ for zip_file_name in model_list :
190+ list_of_params += [[zip_file_name , output_path , zip_path ,
191+ git_hub_repo_url , force_download ]]
192+
193+ MAX_NUM_CORE = 8
194+ num_cores = multiprocessing .cpu_count ()
195+ parallel_task = num_cores if num_cores < MAX_NUM_CORE else MAX_NUM_CORE
196+
197+ start_time = time .time ()
198+ with multiprocessing .Pool (parallel_task ) as p :
199+ results_values = p .starmap (download_unpack_zip , list_of_params )
200+
201+ # Count success and failed downloads
202+ download_cnt = 0
203+ issue_cnt = 0
204+
205+ for value in results_values :
206+ if value :
207+ download_cnt += 1
208+ else :
209+ issue_cnt += 1
210+
211+ end_time = int (time .time () - start_time )
212+ hours , rem = divmod (end_time , 3600 )
213+ minutes , seconds = divmod (rem , 60 )
214+
215+ print ("[INFO] Finish. \n "
216+ " Total models: {}\n "
217+ " Successful: {}\n "
218+ " Failed : {}\n "
219+ " Time: {:0>2}:{:0>2}:{:05.2f}" .
220+ format (int (model_list_num ), int (download_cnt ),
221+ int (issue_cnt ), int (hours ), int (minutes ), int (seconds )))
222+
223+
224+ if __name__ == "__main__" :
225+ main ()
0 commit comments