Skip to content

Commit 58d7ec2

Browse files
authored
Merge pull request #28 from aperture-data/release-0.0.13
Release 0.0.13
2 parents 1646223 + 5a0ef82 commit 58d7ec2

File tree

4 files changed

+39
-13
lines changed

4 files changed

+39
-13
lines changed

aperturedb/CSVParser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ def __init__(self, filename):
2626

2727
self.validate()
2828

29+
if len(self.df) == 0:
30+
print("Error: Dataframe empty. Is the CSV file ok?")
31+
2932
self.df = self.df.astype('object')
3033

3134
self.header = list(self.df.columns.values)

aperturedb/ImageDownloader.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@ class ImageDownloaderCSV(CSVParser.CSVParser):
2323
2424
'''
2525

26-
def __init__(self, filename, check_image=True):
26+
def __init__(self, filename):
2727

2828
self.has_filename = False
29-
self.check_img = check_image
3029

3130
super().__init__(filename)
3231

@@ -60,13 +59,15 @@ def validate(self):
6059

6160
class ImageDownloader(ParallelLoader.ParallelLoader):
6261

63-
def __init__(self, db, dry_run=False):
62+
def __init__(self, db=None, dry_run=False, n_download_retries=0, check_if_present=False):
6463

6564
super().__init__(db, dry_run=dry_run)
6665

6766
self.type = "image"
6867

69-
self.check_img = False
68+
self.check_img = check_if_present
69+
self.images_already_downloaded = 0
70+
self.n_download_retries = n_download_retries
7071

7172
def check_if_image_is_ok(self, filename, url):
7273

@@ -89,13 +90,26 @@ def download_image(self, url, filename):
8990
start = time.time()
9091

9192
if self.check_img and self.check_if_image_is_ok(filename, url):
93+
self.images_already_downloaded += 1
94+
self.times_arr.append(time.time() - start)
9295
return
9396

9497
folder = os.path.dirname(filename)
9598
if not os.path.exists(folder):
9699
os.makedirs(folder, exist_ok=True)
97100

98-
imgdata = requests.get(url)
101+
retries = 0
102+
while True:
103+
imgdata = requests.get(url)
104+
if imgdata.ok:
105+
break
106+
else:
107+
if retries >= self.n_download_retries:
108+
break
109+
print("WARNING: Retrying object:", url)
110+
retries += 1
111+
time.sleep(2)
112+
99113
if imgdata.ok:
100114
fd = open(filename, "wb")
101115
fd.write(imgdata.content)
@@ -120,7 +134,7 @@ def download_image(self, url, filename):
120134
def worker(self, thid, generator, start, end):
121135

122136
if thid == 0 and self.stats:
123-
pb = ProgressBar.ProgressBar("download_progress.txt")
137+
pb = ProgressBar.ProgressBar()
124138

125139
for i in range(start, end):
126140

@@ -139,13 +153,22 @@ def print_stats(self):
139153
print("====== ApertureDB ImageDownloader Stats ======")
140154

141155
times = np.array(self.times_arr)
142-
print("Avg image download time(s):", np.mean(times))
143-
print("Img download time std:", np.std (times))
144-
print("Avg download throughput (images/s)):",
156+
if len(times) <= 0:
157+
print("Error: No downloads.")
158+
return
159+
160+
if self.images_already_downloaded > 0:
161+
print("Images already present:", self.images_already_downloaded)
162+
163+
print("Images downloaded:", len(times) - self.images_already_downloaded)
164+
print("Avg image time(s):", np.mean(times))
165+
print("Image time std:", np.std (times))
166+
print("Throughput (images/s)):",
145167
1 / np.mean(times) * self.numthreads)
146168

147169
print("Total time(s):", self.ingestion_time)
148170
print("Overall throughput (img/s):",
149171
self.total_elements / self.ingestion_time)
150-
print("Total errors encountered:", self.error_counter)
172+
if self.error_counter > 0:
173+
print("Errors encountered:", self.error_counter)
151174
print("=============================================")

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="aperturedb",
8-
version="0.0.12",
8+
version="0.0.13",
99
description="ApertureDB Client Module",
1010
install_requires=['vdms', 'scikit-image', 'image',
1111
'opencv-python', 'numpy', 'matplotlib', 'pandas'],

test/download_images.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
def main(params):
88

9-
loader = ImageDownloader.ImageDownloader(None)
10-
loader.ingest(ImageDownloader.ImageDownloaderCSV(params.in_file, check_image=True),
9+
loader = ImageDownloader.ImageDownloader(check_if_present=True, n_download_retries=2)
10+
loader.ingest(ImageDownloader.ImageDownloaderCSV(params.in_file),
1111
numthreads=32,
1212
stats=True)
1313

0 commit comments

Comments
 (0)