Skip to content

Commit ba15a70

Browse files
authored
Merge pull request #110 from aperture-data/release-0.2.3
Release 0.2.3
2 parents 042b693 + db498e9 commit ba15a70

File tree

4 files changed

+130
-54
lines changed

4 files changed

+130
-54
lines changed

aperturedb/Utils.py

Lines changed: 119 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
import os
22
import sys
33
import time
4+
import logging
45

56
from aperturedb.Connector import Connector
67
from aperturedb import ProgressBar
78

9+
logger = logging.getLogger(__name__)
10+
811
DESCRIPTOR_CLASS = "_Descriptor"
912
DESCRIPTOR_CONNECTION_CLASS = "_DescriptorSetToDescriptor"
1013

14+
DEFAULT_METADATA_BATCH_SIZE = 100_000
15+
1116

1217
class Utils(object):
1318
"""
@@ -37,8 +42,9 @@ def status(self):
3742

3843
try:
3944
res, blobs = self.connector.query(q)
40-
except:
41-
self.connector.print_last_response()
45+
except BaseException as e:
46+
logger.error(self.connector.get_last_response_str())
47+
raise e
4248

4349
return self.connector.get_last_response_str()
4450

@@ -55,15 +61,15 @@ def get_schema(self, refresh=False):
5561
}
5662
}]
5763

58-
res, blobs = self.connector.query(query)
64+
res, _ = self.connector.query(query)
5965

6066
schema = {}
6167

6268
try:
6369
schema = res[0]["GetSchema"]
64-
except:
65-
print("Cannot retrieve schema")
66-
self.connector.print_last_response()
70+
except BaseException as e:
71+
logger.error(self.connector.get_last_response_str())
72+
raise e
6773

6874
return schema
6975

@@ -81,10 +87,10 @@ def _create_index(self, index_type, class_name, property_key, property_type):
8187
try:
8288
res, blobs = self.connector.query(q)
8389
if not self.connector.last_query_ok():
84-
self.connector.print_last_response()
90+
logger.error(self.connector.get_last_response_str())
8591
return False
8692
except:
87-
self.connector.print_last_response()
93+
logger.error(self.connector.get_last_response_str())
8894
return False
8995

9096
return True
@@ -102,10 +108,10 @@ def _remove_index(self, index_type, class_name, property_key):
102108
try:
103109
res, blobs = self.connector.query(q)
104110
if not self.connector.last_query_ok():
105-
self.connector.print_last_response()
111+
logger.error(self.connector.get_last_response_str())
106112
return False
107113
except:
108-
self.connector.print_last_response()
114+
logger.error(self.connector.get_last_response_str())
109115
return False
110116

111117
return True
@@ -145,12 +151,74 @@ def count_images(self, constraints={}):
145151
try:
146152
res, blobs = self.connector.query(q)
147153
total_images = res[0]["FindImage"]["count"]
148-
except:
149-
total_images = 0
150-
self.connector.print_last_response()
154+
except BaseException as e:
155+
logger.error(self.connector.get_last_response_str())
156+
raise e
151157

152158
return total_images
153159

160+
def get_uniqueids(self, object_type, constraints={}):
161+
162+
q = [{
163+
"FindEntity": {
164+
"with_class": object_type,
165+
"batch": {},
166+
"results": {
167+
"list": ["_uniqueid"],
168+
}
169+
}
170+
}]
171+
172+
if constraints:
173+
q[0]["FindEntity"]["constraints"] = constraints
174+
175+
ids = []
176+
177+
try:
178+
res, blobs = self.connector.query(q)
179+
total_elements = res[0]["FindEntity"]["batch"]["total_elements"]
180+
except BaseException as e:
181+
logger.error(self.connector.get_last_response_str())
182+
raise e
183+
184+
batch_size = DEFAULT_METADATA_BATCH_SIZE
185+
iterations = total_elements // batch_size
186+
reminder = total_elements % batch_size
187+
188+
if iterations == 0 and reminder > 0:
189+
iterations = 1
190+
191+
pb = ProgressBar.ProgressBar()
192+
193+
for i in range(iterations):
194+
195+
batch = {
196+
"batch_size": batch_size,
197+
"batch_id": i
198+
}
199+
200+
q[0]["FindEntity"]["batch"] = batch
201+
202+
try:
203+
res, blobs = self.connector.query(q)
204+
ids += [element["_uniqueid"]
205+
for element in res[0]["FindEntity"]["entities"]]
206+
except BaseException as e:
207+
logger.error(self.connector.get_last_response_str())
208+
raise e
209+
210+
if self.verbose:
211+
pb.update(i / iterations)
212+
213+
if self.verbose:
214+
pb.update(1) # For the end of line
215+
216+
return ids
217+
218+
def get_images_uniqueids(self, constraints={}):
219+
220+
return self.get_uniqueids("_Image", constraints)
221+
154222
def count_bboxes(self, constraints=None):
155223
# The default params in python functions should not be
156224
# mutable objects.
@@ -170,9 +238,9 @@ def count_bboxes(self, constraints=None):
170238
try:
171239
res, blobs = self.connector.query(q)
172240
total_connections = res[0]["FindBoundingBox"]["count"]
173-
except:
174-
total_connections = 0
175-
self.connector.print_last_response()
241+
except BaseException as e:
242+
logger.error(self.connector.get_last_response_str())
243+
raise e
176244

177245
return total_connections
178246

@@ -192,10 +260,21 @@ def count_entities(self, entity_class, constraints=None):
192260

193261
try:
194262
res, blobs = self.connector.query(q)
195-
total_entities = res[0]["FindEntity"]["count"]
196-
except:
197-
total_entities = 0
198-
self.connector.print_last_response()
263+
fe = res[0]["FindEntity"]
264+
265+
if fe["status"] == 1:
266+
# TODO: Here we return 0 entities because the query failed.
267+
# This is because Find* Command will return status: 1
268+
# and no count if no object is found.
269+
# We should change the Find* Command to return status: 0
270+
# and count: 0 if no object is found.
271+
total_entities = 0
272+
else:
273+
total_entities = fe["count"]
274+
275+
except BaseException as e:
276+
logger.error(self.connector.get_last_response_str())
277+
raise e
199278

200279
return total_entities
201280

@@ -216,9 +295,9 @@ def count_connections(self, connections_class, constraints=None):
216295
try:
217296
res, blobs = self.connector.query(q)
218297
total_connections = res[0]["FindConnection"]["count"]
219-
except:
220-
total_connections = 0
221-
self.connector.print_last_response()
298+
except BaseException as e:
299+
logger.error(self.connector.get_last_response_str())
300+
raise e
222301

223302
return total_connections
224303

@@ -242,8 +321,10 @@ def add_descriptorset(self, name, dim, metric="L2", engine="FaissFlat"):
242321
}]
243322

244323
if response != expected:
245-
print("Error inserting set", name)
246-
self.connector.print_last_response()
324+
logger.error(self.connector.get_last_response_str())
325+
return False
326+
327+
return True
247328

248329
def count_descriptorsets(self):
249330

@@ -258,9 +339,9 @@ def count_descriptorsets(self):
258339
try:
259340
res, blobs = self.connector.query(q)
260341
total_descriptor_sets = res[0]["FindDescriptorSet"]["count"]
261-
except:
262-
total_descriptor_sets = 0
263-
self.connector.print_last_response()
342+
except BaseException as e:
343+
logger.error(self.connector.get_last_response_str())
344+
raise e
264345

265346
return total_descriptor_sets
266347

@@ -281,8 +362,9 @@ def get_descriptorset_list(self):
281362

282363
sets = [ent["_name"]
283364
for ent in res[0]["FindDescriptorSet"]["entities"]]
284-
except:
285-
self.connector.print_last_response()
365+
except BaseException as e:
366+
logger.error(self.connector.get_last_response_str())
367+
raise e
286368

287369
return sets
288370

@@ -303,10 +385,10 @@ def remove_descriptorset(self, set_name):
303385
try:
304386
res, _ = self.connector.query(q)
305387
if not self.connector.last_query_ok():
306-
self.connector.print_last_response()
388+
logger.error(self.connector.get_last_response_str())
307389
return False
308390
except:
309-
self.connector.print_last_response()
391+
logger.error(self.connector.get_last_response_str())
310392
return False
311393

312394
return True
@@ -347,7 +429,7 @@ def _remove_objects(self, type, class_name, batch_size):
347429
res, _ = self.connector.query(q)
348430

349431
if not self.connector.last_query_ok():
350-
self.connector.print_last_response()
432+
logger.error(self.connector.get_last_response_str())
351433
return False
352434

353435
count -= batch_size
@@ -426,7 +508,6 @@ def count_descriptors_in_set(self, set_name):
426508
"FindDescriptorSet": {
427509
"_ref": 1,
428510
"with_name": set_name,
429-
430511
}
431512
}, {
432513
"FindDescriptor": {
@@ -444,10 +525,11 @@ def count_descriptors_in_set(self, set_name):
444525
try:
445526
res, _ = self.connector.query(q)
446527
if not self.connector.last_query_ok():
447-
self.connector.print_last_response()
528+
logger.error(self.connector.get_last_response_str())
448529
else:
449530
total = res[1]["FindDescriptor"]["count"]
450-
except:
451-
self.connector.print_last_response()
531+
except BaseException as e:
532+
logger.error(self.connector.get_last_response_str())
533+
raise e
452534

453535
return total

aperturedb/VideoDownloader.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import cv2
77
import numpy as np
88

9-
from aperturedb import ParallelLoader
9+
from aperturedb import Parallelizer
1010
from aperturedb import CSVParser
1111
from aperturedb import ProgressBar
1212

@@ -61,11 +61,11 @@ def validate(self):
6161
self.has_filename = True
6262

6363

64-
class VideoDownloader(ParallelLoader.ParallelLoader):
64+
class VideoDownloader(Parallelizer.Parallelizer):
6565

66-
def __init__(self, db, dry_run=False):
66+
def __init__(self, ):
6767

68-
super().__init__(db, dry_run=dry_run)
68+
super().__init__()
6969

7070
self.type = "video"
7171

@@ -81,7 +81,7 @@ def check_if_video_is_ok(self, filename, url):
8181
if a.isOpened() == False:
8282
print("Video present but error reading it:", url)
8383
return False
84-
except:
84+
except BaseException:
8585
print("Video present but error decoding:", url)
8686
return False
8787

@@ -110,7 +110,7 @@ def download_video(self, url, filename):
110110
print("Downloaded Video size error:", url)
111111
os.remove(filename)
112112
self.error_counter += 1
113-
except:
113+
except BaseException:
114114
print("Downloaded Video cannot be decoded:", url)
115115
os.remove(filename)
116116
self.error_counter += 1
@@ -122,20 +122,14 @@ def download_video(self, url, filename):
122122

123123
def worker(self, thid, generator, start, end):
124124

125-
if thid == 0 and self.stats:
126-
pb = ProgressBar.ProgressBar("download_progress.txt")
127-
128125
for i in range(start, end):
129126

130127
url, filename = generator[i]
131128

132129
self.download_video(url, filename)
133130

134131
if thid == 0 and self.stats:
135-
pb.update((i - start) / (end - start))
136-
137-
if thid == 0 and self.stats:
138-
pb.update(1)
132+
self.pb.update((i - start) / (end - start))
139133

140134
def print_stats(self):
141135

@@ -147,8 +141,8 @@ def print_stats(self):
147141
print("Avg download throughput (videos/s)):",
148142
1 / np.mean(times) * self.numthreads)
149143

150-
print("Total time(s):", self.ingestion_time)
144+
print("Total time(s):", self.total_actions_time)
151145
print("Overall throughput (videos/s):",
152-
self.total_elements / self.ingestion_time)
146+
self.total_actions / self.total_actions_time)
153147
print("Total errors encountered:", self.error_counter)
154148
print("=============================================")

aperturedb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
logger = logging.getLogger(__name__)
77

8-
__version__ = "0.2.2"
8+
__version__ = "0.2.3"
99

1010
# set log level
1111
logger.setLevel(logging.DEBUG)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="aperturedb",
8-
version="0.2.2",
8+
version="0.2.3",
99
description="ApertureDB Client Module",
1010
install_requires=['protobuf>=3.20.0', 'scikit-image', 'image', 'requests', 'boto3',
1111
'opencv-python', 'numpy', 'matplotlib', 'pandas', 'kaggle'],

0 commit comments

Comments
 (0)