Skip to content

Commit 7c4bf1e

Browse files
committed
[data] Add checks to avoid downloading the same file twice.
1 parent 3844c66 commit 7c4bf1e

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

download_utils.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,19 @@ def download_file(url, file_path):
2626
print("Removed incomplete download")
2727

2828

29-
def download_from_github(version, fn, target_dir):
29+
def download_from_github(version, fn, target_dir, force=False):
3030
url = REPOSITORY_PATH + "/releases/download/{0}/{1}".format(version, fn)
3131
file_path = os.path.join(target_dir, fn)
32+
if os.path.exists(file_path) and not force:
33+
print("File {} is already downloaded.".format(file_path))
34+
return
3235
download_file(url, file_path)
3336

3437

35-
def sequential_downloader(version, fns, target_dir):
38+
def sequential_downloader(version, fns, target_dir, force=False):
3639
os.makedirs(target_dir, exist_ok=True)
3740
for fn in fns:
38-
download_from_github(version, fn, target_dir)
41+
download_from_github(version, fn, target_dir, force=force)
3942

4043

4144
def link_all_files_from_dir(src_dir, dst_dir):
@@ -54,7 +57,7 @@ def link_resources():
5457
link_all_files_from_dir("../readonly/dataset/", ".")
5558

5659

57-
def download_week1_resources():
60+
def download_week1_resources(force=False):
5861
sequential_downloader(
5962
"week1",
6063
[
@@ -63,17 +66,19 @@ def download_week1_resources():
6366
"test.tsv",
6467
"text_prepare_tests.tsv",
6568
],
66-
"data"
69+
"data",
70+
force=force
6771
)
6872

6973

70-
def download_week2_resources():
74+
def download_week2_resources(force=False):
7175
sequential_downloader(
7276
"week2",
7377
[
7478
"train.txt",
7579
"validation.txt",
7680
"test.txt",
7781
],
78-
"data"
82+
"data",
83+
force=force
7984
)

0 commit comments

Comments
 (0)