Skip to content

Commit 89b0b2a

Browse files
authored
Merge pull request #434 from aperture-data/release-0.4.26
Release 0.4.26
2 parents e109734 + f5518fb commit 89b0b2a

File tree

17 files changed

+614
-83
lines changed

17 files changed

+614
-83
lines changed

aperturedb/Connector.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,11 @@ def __del__(self):
182182
self.connected = False
183183

184184
def _send_msg(self, data):
185+
# aperturedb's param ADB_MAX_CONNECTION_MESSAGE_SIZE_MB = 256 by default
186+
if len(data) > (256 * 2**20):
187+
logger.warning(
188+
"Message sent is larger than default for ApertureDB Server. Server may disconnect.")
189+
185190
sent_len = struct.pack('@I', len(data)) # send size first
186191
x = self.conn.send(sent_len + data)
187192
return x == len(data) + 4

aperturedb/Images.py

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from aperturedb import Utils
1414
from aperturedb.Entities import Entities
1515
from aperturedb.Constraints import Constraints
16+
from aperturedb.ParallelQuery import execute_batch
1617
from ipywidgets import widgets
1718
from IPython.display import display, HTML
1819
import base64
@@ -222,7 +223,7 @@ def __init__(self, db, batch_size=100, response=None, **kwargs):
222223

223224
# Blobs can be passed in addition the response.
224225
# This would mean that the images are already retrieved.
225-
# This is useful for usage of the class dor it's utility methods.
226+
# This is useful for usage of the class for it's utility methods.
226227
if "blobs" in kwargs:
227228
blobs = kwargs["blobs"]
228229
for i, id in enumerate(self.images_ids):
@@ -279,6 +280,9 @@ def __retrieve_batch(self, index):
279280
uniqueid = self.images_ids[idx]
280281
self.images[str(uniqueid)] = imgs[i]
281282

283+
def retrieve_polygons(self, index):
284+
return self.__retrieve_polygons(index, constraints=None, tag_key="_label", tag_format="{}")
285+
282286
def __retrieve_polygons(self, index, constraints, tag_key, tag_format):
283287

284288
if index > len(self.images_ids):
@@ -327,37 +331,39 @@ def __retrieve_polygons(self, index, constraints, tag_key, tag_format):
327331
fpq_res["list"].append(key)
328332

329333
try:
330-
res, _ = self.db_connector.query(query)
334+
result, res, _ = execute_batch(
335+
db=self.db_connector, q=query, blobs=[])
331336

332337
polygons = []
333338
bounds = []
334339
tags = []
335340
meta = []
336-
polys = res[1]["FindPolygon"]["entities"]
337-
operations = self.query["operations"] if self.query and "operations" in self.query else [
338-
]
339-
for poly in polys:
340-
if tag_key and tag_format:
341-
tag = tag_format.format(poly[tag_key])
342-
tags.append(tag)
343-
meta.append(res[0]["FindImage"]["entities"][0])
344-
345-
bounds.append(poly["_bounds"])
346-
converted = []
347-
for vert in poly["_vertices"]:
348-
v = resolve(
349-
np.array(vert),
350-
res[0]["FindImage"]["entities"][0],
351-
operations)
352-
converted.append(v)
353-
polygons.append(converted)
354-
355-
self.images_polygons[str(uniqueid)] = {
356-
"bounds": bounds,
357-
"polygons": polygons,
358-
"tags": tags,
359-
"meta": meta
360-
}
341+
if "entities" in res[1]["FindPolygon"]:
342+
polys = res[1]["FindPolygon"]["entities"]
343+
operations = self.query["operations"] if self.query and "operations" in self.query else [
344+
]
345+
for poly in polys:
346+
if tag_key and tag_format:
347+
tag = tag_format.format(poly[tag_key])
348+
tags.append(tag)
349+
meta.append(res[0]["FindImage"]["entities"][0])
350+
351+
bounds.append(poly["_bounds"])
352+
converted = []
353+
for vert in poly["_vertices"]:
354+
v = resolve(
355+
np.array(vert),
356+
res[0]["FindImage"]["entities"][0],
357+
operations)
358+
converted.append(v)
359+
polygons.append(converted)
360+
361+
self.images_polygons[str(uniqueid)] = {
362+
"bounds": bounds,
363+
"polygons": polygons,
364+
"tags": tags,
365+
"meta": meta
366+
}
361367

362368
except Exception as e:
363369
self.images_polygons[str(uniqueid)] = {
@@ -418,7 +424,8 @@ def __retrieve_bounding_boxes(self, index, constraints):
418424
uniqueid_str = str(uniqueid)
419425
self.images_bboxes[uniqueid_str] = {}
420426
try:
421-
res, images = self.db_connector.query(query)
427+
result, res, images = execute_batch(
428+
db=self.db_connector, q=query, blobs=[])
422429
bboxes = []
423430
tags = []
424431
meta = []

aperturedb/ParallelQuery.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -58,26 +58,27 @@ def execute_batch(q: Commands, blobs: Blobs, db: Connector,
5858
blobs_end = blobs_start + blobs_per_query
5959

6060
b_count = 0
61-
for req, resp in zip(q[start:end], r[start:end]):
62-
for k in req:
63-
# Ref to https://docs.aperturedata.io/query_language/Reference/shared_command_parameters/blobs
64-
blobs_where_default_true = \
65-
k in ["FindImage", "FindBlob", "FindVideo"] and (
66-
"blobs" not in req[k] or req[k]["blobs"])
67-
blobs_where_default_false = \
68-
k in [
69-
"FindDescriptor", "FindBoundingBox"] and "blobs" in req[k] and req[k]["blobs"]
70-
if blobs_where_default_true or blobs_where_default_false:
71-
count = resp[k]["returned"]
72-
b_count += count
61+
if issubclass(type(r), list):
62+
for req, resp in zip(q[start:end], r[start:end]):
63+
for k in req:
64+
# Ref to https://docs.aperturedata.io/query_language/Reference/shared_command_parameters/blobs
65+
blobs_where_default_true = \
66+
k in ["FindImage", "FindBlob", "FindVideo"] and (
67+
"blobs" not in req[k] or req[k]["blobs"])
68+
blobs_where_default_false = \
69+
k in [
70+
"FindDescriptor", "FindBoundingBox"] and "blobs" in req[k] and req[k]["blobs"]
71+
if blobs_where_default_true or blobs_where_default_false:
72+
count = resp[k]["returned"]
73+
b_count += count
7374

7475
try:
7576
# The returned blobs need to be sliced to match the
7677
# returned entities per command in query.
7778
response_handler(
7879
q[start:end],
7980
blobs[blobs_start:blobs_end],
80-
r[start:end],
81+
r[start:end] if issubclass(type(r), list) else r,
8182
b[blobs_returned:blobs_returned + b_count] if len(b) >= blobs_returned + b_count else None)
8283
except BaseException as e:
8384
logger.exception(e)
@@ -184,14 +185,23 @@ def update_refs(batched_commands):
184185
values["image_ref"] = updates[values["image_ref"]]
185186
if "video_ref" in values:
186187
values["video_ref"] = updates[values["video_ref"]]
187-
if "is_connected_to" in values and "_ref" in values["is_connected_to"]:
188-
values["is_connected_to"]["_ref"] = updates[values["is_connected_to"]["_ref"]]
188+
if "is_connected_to" in values:
189+
if "ref" in values["is_connected_to"]:
190+
values["is_connected_to"]["ref"] = updates[values["is_connected_to"]["ref"]]
191+
for op in ["any", "all"]:
192+
if op in values["is_connected_to"]:
193+
for idx in range(len(values["is_connected_to"][op])):
194+
if "ref" in values["is_connected_to"][op][idx]:
195+
values["is_connected_to"][op][idx]["ref"] = updates[values["is_connected_to"][op][idx]["ref"]]
189196
if "connect" in values and "ref" in values["connect"]:
190197
values["connect"]["ref"] = updates[values["connect"]["ref"]]
191198
if "src" in values:
192199
values["src"] = updates[values["src"]]
193200
if "dst" in values:
194201
values["dst"] = updates[values["dst"]]
202+
if "ref" in values:
203+
values["ref"] = updates[values["ref"]]
204+
195205
return batched_commands
196206

197207
q = update_refs([cmd for query in data for cmd in query[0]])
@@ -274,8 +284,11 @@ def filter_per_group(group):
274284
[v['status'] == 2 for i in r for k, v in filter_per_group(i)])
275285
sq = 0
276286
for i in range(0, len(r), self.commands_per_query):
277-
if all([v['status'] == 0 for j in r[i:i + self.commands_per_query] for k, v in filter_per_group(j)]):
278-
sq += 1
287+
# Some errors stop the whole query from being executed
288+
# https://docs.aperturedata.io/query_language/Overview/Responses#return-status
289+
if issubclass(type(r), list):
290+
if all([v['status'] == 0 for j in r[i:i + self.commands_per_query] for k, v in filter_per_group(j)]):
291+
sq += 1
279292
worker_stats["succeeded_queries"] = sq
280293
else:
281294
query_time = 1

aperturedb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
logger = logging.getLogger(__name__)
99

10-
__version__ = "0.4.25"
10+
__version__ = "0.4.26"
1111

1212
# set log level
1313
logger.setLevel(logging.DEBUG)

aperturedb/cli/ingest.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,15 @@ def generate_embedding_csv_from_image_csv(
258258
metadata = []
259259
connection = []
260260
embeddings = []
261+
errored = []
261262

262263
for i in tqdm(range(data.sample_count)):
263264
d = data[i]
264-
embeddings.append(np.frombuffer(d[1][1], dtype=np.float32))
265+
nparr = np.frombuffer(d[1][1], dtype=np.float32)
266+
if not nparr.any():
267+
errored.append(d[0][0]["AddImage"]["properties"])
268+
269+
embeddings.append(nparr)
265270
descriptor_id = f"{set_name}_{os.path.basename(input_file)}_{i}"
266271
metadata.append({
267272
"filename": f"{filename}.npy",
@@ -282,3 +287,6 @@ def generate_embedding_csv_from_image_csv(
282287
f"{filename}_metadata.adb.csv", index=False)
283288
pd.json_normalize(connection).to_csv(
284289
f"{filename}_connection.adb.csv", index=False)
290+
if(len(errored) > 0):
291+
pd.json_normalize(errored).to_csv(
292+
f"{filename}_errored.csv", index=False)

0 commit comments

Comments
 (0)