Skip to content

Commit 90b5ef9

Browse files
udpate: zip downloaded. Modify to working both Windows/Linux
1 parent 36a75a8 commit 90b5ef9

File tree

1 file changed

+141
-130
lines changed

1 file changed

+141
-130
lines changed

caffe_models_zipped/download_unpack_cnn_models.py

Lines changed: 141 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,9 @@
3131
import multiprocessing
3232
import requests
3333

34-
# Multi-process shared objects
35-
download_cnt = multiprocessing.Value('d', 0) # Count downloaded models
36-
issue_cnt = multiprocessing.Value('d', 0) # Count issues
37-
model_list_num = multiprocessing.Value('d', 0)
3834

39-
40-
def download_unpack_zip(zip_name):
35+
def download_unpack_zip(zip_name, output_path, zip_path,
36+
git_hub_repo_url, force_download):
4137
"""
4238
Download and unpack one zip file
4339
"""
@@ -51,40 +47,35 @@ def download_unpack_zip(zip_name):
5147
model_out_folder = output_path/name
5248

5349
if model_out_folder.is_dir():
54-
if FORCE_DOWNLOAD:
50+
if force_download:
5551
print("[INFO] --force option is set. {} will be rewritten".
5652
format(model_out_folder))
5753
shutil.rmtree(model_out_folder)
5854
else:
5955
print("[INFO] Skip downloading {}. Model already exists here {}".
6056
format(zip_name, model_out_folder))
61-
return
57+
return False
6258

6359
file_name_url = git_hub_repo_url + '/' + zip_name
64-
file_name_zip_out = zip_path / zip_name
60+
file_name_zip = zip_path / zip_name
6561

6662
print("[INFO] Start downloading {}".format(zip_name))
6763
request_check = requests.get(file_name_url)
6864
if request_check.status_code != 200:
69-
issue_cnt.value += 1
7065
print(" [ERROR]. Can not access to Zip file by address {}".
7166
format(file_name_url))
72-
print("[INFO] File {} is missed. {} from {} "
73-
.format(zip_name, int(issue_cnt.value),
74-
int(model_list_num.value)))
75-
return
67+
return False
7668

7769
with requests.get(file_name_url, stream=True) as r_zip_file:
78-
with open(file_name_zip_out, 'wb') as f_download:
70+
with open(file_name_zip, 'wb') as f_download:
7971
shutil.copyfileobj(r_zip_file.raw, f_download)
8072

81-
with zipfile.ZipFile(file_name_zip_out, 'r') as zip_ref:
73+
with zipfile.ZipFile(file_name_zip, 'r') as zip_ref:
8274
zip_ref.extractall(output_path)
8375

84-
download_cnt.value += 1
85-
print("[INFO] {} is downloaded and unpacked. {}th from {} zip files"
86-
.format(zip_name, int(download_cnt.value),
87-
int(model_list_num.value)))
76+
print("[INFO] {} is downloaded and unpacked"
77+
.format(zip_name))
78+
return True
8879

8980

9081
def arg2bool(bool_arg):
@@ -102,113 +93,133 @@ def arg2bool(bool_arg):
10293
raise argparse.ArgumentTypeError('Boolean value expected.')
10394

10495

105-
parser = argparse.ArgumentParser()
106-
107-
parser.add_argument(
108-
"--model_path", type=str, required=False,
109-
help="Path to NN Models home")
110-
111-
parser.add_argument(
112-
"--model_list", type=str, required=True,
113-
help="File with list of models")
114-
115-
parser.add_argument(
116-
"--force", type=arg2bool, nargs='?', const=True, required=False,
117-
help="Force to download/unpack models. Overwrite existed")
118-
119-
args = parser.parse_args()
120-
121-
list_file = Path(args.model_list)
122-
123-
FORCE_DOWNLOAD = False # Rewrite existed models
124-
if args.force is not None:
125-
if args.force:
126-
FORCE_DOWNLOAD = True
127-
128-
if not list_file.is_file():
129-
print("[ERROR]. List file {} is not found".format(list_file))
130-
exit(1)
131-
132-
# Get a file with models
133-
with open(list_file, "r") as f:
134-
zips_files_lines = f.readlines()
135-
136-
# Create zip lists from list_file
137-
model_list = []
138-
for line in zips_files_lines:
139-
model_name = line.strip()
140-
# Avoid comments and empty strings
141-
if model_name.isspace() or model_name == "" or model_name[0] == '#':
142-
# print("[DEBUG] string is empty or comment {}".format(model_name))
143-
continue
144-
model_list.extend([model_name])
145-
146-
model_list_num.value = len(model_list)
147-
148-
if model_list_num == 0:
149-
print("[ERROR]. There is no model names in model list file {}".
150-
format(list_file))
151-
exit(1)
152-
153-
print("[INFO] Number of models in the model list is {}"
154-
.format(model_list_num.value))
155-
156-
# Get and check output path
157-
if args.model_path:
158-
output_path = Path(args.model_path)/"caffe_models"
159-
elif os.environ['EV_CNNMODELS_HOME']:
160-
output_path = Path(os.environ['EV_CNNMODELS_HOME']) / "caffe_models"
161-
else:
162-
print("[ERROR]. Output path is not defined. "
163-
"Use --model_path or setup EV_CNNMODELS_HOME environment variable")
164-
exit(1)
165-
try:
166-
output_path.mkdir(parents=True, exist_ok=True)
167-
except:
168-
print("[ERROR]. Problem of creating CNN Models folder {}".
169-
format(output_path))
170-
exit(1)
171-
172-
zip_path = output_path/"download" # folder for downloaded zip files
173-
try:
174-
zip_path.mkdir(parents=True, exist_ok=True)
175-
except:
176-
print(
177-
"[ERROR]. Problem of creating folder to download zipped CNN Models: {}"
178-
.format(zip_path))
179-
exit(1)
180-
181-
print("[INFO] NN Models path is {}".format(output_path))
182-
print("[INFO] Zipped NN Models path is {}".format(zip_path))
183-
184-
git_hub_repo_url = 'https://github.com/foss-for-synopsys-dwc-arc-processors/synopsys-caffe-models/raw/master/caffe_models_zipped'
185-
186-
# check GitHub site access
187-
request = requests.get(git_hub_repo_url)
188-
if request.status_code != 200:
189-
print("[ERROR]. Can not access to GitHub by address {}"
190-
.format(git_hub_repo_url))
191-
exit(1)
192-
193-
# Download and unpack zip files in parallel mode
194-
download_cnt.value = 0
195-
issue_cnt.value = 0
196-
MAX_NUM_CORE = 8
197-
num_cores = multiprocessing.cpu_count()
198-
parallel_task = num_cores if num_cores < MAX_NUM_CORE else MAX_NUM_CORE
199-
200-
start_time = time.time()
201-
with multiprocessing.Pool(parallel_task) as p:
202-
p.map(download_unpack_zip, model_list)
203-
204-
end_time = int(time.time() - start_time)
205-
hours, rem = divmod(end_time, 3600)
206-
minutes, seconds = divmod(rem, 60)
207-
208-
print("[INFO] Finish. \n"
209-
" Total models: {}\n"
210-
" Successful: {}\n"
211-
" Failed : {}\n"
212-
" Time: {:0>2}:{:0>2}:{:05.2f}".
213-
format(int(model_list_num.value), int(download_cnt.value),
214-
int(issue_cnt.value), int(hours), int(minutes), int(seconds)))
96+
def main():
97+
git_hub_repo_url = 'https://github.com/foss-for-synopsys-dwc-arc-processors/synopsys-caffe-models/raw/master/caffe_models_zipped'
98+
99+
parser = argparse.ArgumentParser()
100+
101+
parser.add_argument(
102+
"--model_path", type=str, required=False,
103+
help="Path to NN Models home")
104+
105+
parser.add_argument(
106+
"--model_list", type=str, required=True,
107+
help="File with list of models")
108+
109+
parser.add_argument(
110+
"--force", type=arg2bool, nargs='?', const=True, required=False,
111+
help="Force to download/unpack models. Overwrite existed")
112+
113+
args = parser.parse_args()
114+
115+
list_file = Path(args.model_list)
116+
117+
force_download = False # Rewrite existed models
118+
if args.force is not None:
119+
if args.force:
120+
force_download = True
121+
122+
if not list_file.is_file():
123+
print("[ERROR]. List file {} is not found".format(list_file))
124+
exit(1)
125+
126+
# Get a file with models
127+
with open(list_file, "r") as f:
128+
zips_files_lines = f.readlines()
129+
130+
# Create zip lists from list_file
131+
model_list = []
132+
for line in zips_files_lines:
133+
model_name = line.strip()
134+
# Avoid comments and empty strings
135+
if model_name.isspace() or model_name == "" or model_name[0] == '#':
136+
# print("[DEBUG] string is empty or comment {}".format(model_name))
137+
continue
138+
model_list.extend([model_name])
139+
140+
model_list_num = len(model_list)
141+
142+
if model_list_num == 0:
143+
print("[ERROR]. There is no model names in model list file {}".
144+
format(list_file))
145+
exit(1)
146+
147+
print("[INFO] Number of models in the model list is {}"
148+
.format(model_list_num))
149+
150+
# Get and check output path
151+
if args.model_path:
152+
output_path = Path(args.model_path)/"caffe_models"
153+
elif os.environ['EV_CNNMODELS_HOME']:
154+
output_path = Path(os.environ['EV_CNNMODELS_HOME']) / "caffe_models"
155+
else:
156+
print("[ERROR]. Output path is not defined. Use --model_path or "
157+
"setup EV_CNNMODELS_HOME environment variable")
158+
exit(1)
159+
try:
160+
output_path.mkdir(parents=True, exist_ok=True)
161+
except:
162+
print("[ERROR]. Problem of creating CNN Models folder {}".
163+
format(output_path))
164+
exit(1)
165+
166+
zip_path = output_path/"download" # folder for downloaded zip files
167+
try:
168+
zip_path.mkdir(parents=True, exist_ok=True)
169+
except:
170+
print(
171+
"[ERROR]. Problem of creating folder to download zipped CNN Models:"
172+
" {}".format(zip_path))
173+
exit(1)
174+
175+
print("[INFO] NN Models path is {}".format(output_path))
176+
print("[INFO] Zipped NN Models path is {}".format(zip_path))
177+
178+
# check GitHub site access
179+
request = requests.get(git_hub_repo_url)
180+
if request.status_code != 200:
181+
print("[ERROR]. Can not access to GitHub by address {}"
182+
.format(git_hub_repo_url))
183+
exit(1)
184+
185+
# Download and unpack zip files in parallel mode
186+
list_of_params = []
187+
188+
# Use multi-processing to speed-up zipping
189+
for zip_file_name in model_list:
190+
list_of_params += [[zip_file_name, output_path, zip_path,
191+
git_hub_repo_url, force_download]]
192+
193+
MAX_NUM_CORE = 8
194+
num_cores = multiprocessing.cpu_count()
195+
parallel_task = num_cores if num_cores < MAX_NUM_CORE else MAX_NUM_CORE
196+
197+
start_time = time.time()
198+
with multiprocessing.Pool(parallel_task) as p:
199+
results_values = p.starmap(download_unpack_zip, list_of_params)
200+
201+
# Count success and failed downloads
202+
download_cnt = 0
203+
issue_cnt = 0
204+
205+
for value in results_values:
206+
if value:
207+
download_cnt += 1
208+
else:
209+
issue_cnt += 1
210+
211+
end_time = int(time.time() - start_time)
212+
hours, rem = divmod(end_time, 3600)
213+
minutes, seconds = divmod(rem, 60)
214+
215+
print("[INFO] Finish. \n"
216+
" Total models: {}\n"
217+
" Successful: {}\n"
218+
" Failed : {}\n"
219+
" Time: {:0>2}:{:0>2}:{:05.2f}".
220+
format(int(model_list_num), int(download_cnt),
221+
int(issue_cnt), int(hours), int(minutes), int(seconds)))
222+
223+
224+
if __name__ == "__main__":
225+
main()

0 commit comments

Comments
 (0)