Merge pull request #11 from aperture-data/pytorch_connectors

luisremis · web-flow · commit 9bd1eab2859c · 2021-05-13T11:23:53.000-07:00
PyTorch connectors
diff --git a/aperturedb/Images.py b/aperturedb/Images.py
@@ -68,7 +68,7 @@ def rotate(self, angle, resize=False):
 
 class Images(object):
 
-    def __init__(self, db, batch_size=20):
+    def __init__(self, db, batch_size=100):
 
         self.db_connector = db
 
@@ -84,7 +84,6 @@ def __init__(self, db, batch_size=20):
         self.search_result = None
 
         self.batch_size = batch_size
-        self.max_cached_images = 1000
         self.total_cached_images = 0
         self.display_limit = 20
 
@@ -279,7 +278,7 @@ def get_np_image_by_index(self, index):
 
         image = self.get_image_by_index(index)
         # Just decode the image from buffer
-        nparr = np.fromstring(image, np.uint8)
+        nparr = np.frombuffer(image, dtype=np.uint8)
         image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
@@ -337,7 +336,6 @@ def search(self, constraints=None, operations=None, format=None, limit=None):
 
             for ent in entities:
                 self.images_ids.append(ent[self.img_id_prop])
-
         except:
             print("Error with search")
 
@@ -424,7 +422,7 @@ def display(self, show_bboxes=False, show_segmentation=False, limit=None):
             image    = self.get_image_by_index(i)
 
             # Just decode the image from buffer
-            nparr = np.fromstring(image, np.uint8)
+            nparr = np.frombuffer(image, dtype=np.uint8)
             image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
             if show_bboxes:
diff --git a/aperturedb/PyTorchDataset.py b/aperturedb/PyTorchDataset.py
@@ -1,30 +1,136 @@
 import os
+import math
 
-from aperturedb import Image
+import numpy as np
+import cv2
+
+from aperturedb import Images
 
 import torch
 from torch.utils import data
 from torchvision import transforms
 
-class ApertureDBDataset(data.Dataset):
+class ApertureDBDatasetConstraints(data.Dataset):
 
     # initialise function of class
     def __init__(self, db, constraints):
 
-        self.imgs_handler = Image.Images(db)
-        self.imgs_handler.search(constraints=constraints, limit=50)
+        self.imgs_handler = Images.Images(db)
+        self.imgs_handler.search(constraints=constraints)
 
-    # obtain the sample with the given index
     def __getitem__(self, index):
 
-        img   = self.imgs_handler.get_np_image_by_index(index)
-        label = self.imgs_handler.get_bboxes_by_index(index)
+        if index >= self.imgs_handler.total_results():
+            raise StopIteration
 
-        img = transforms.ToTensor()(img)
-        # label = torch.as_tensor(label, dtype=torch.int64)
+        img = self.imgs_handler.get_np_image_by_index(index)
 
+        # This is temporary until we define a good, generic way, of
+        # retriving a label associated with the image.
+        label = "none"
         return img, label
 
-    # the total number of samples (optional)
     def __len__(self):
+
         return self.imgs_handler.total_results()
+
+class ApertureDBDataset(data.Dataset):
+
+    # initialise function of class
+    def __init__(self, db, query, label_prop=None):
+
+        self.db = db
+        self.query = query
+        self.find_image_idx = None
+        self.total_elements = 0
+        self.batch_size     = 100
+        self.batch_images   = []
+        self.batch_start    = 0
+        self.batch_end      = 0
+        self.label_prop     = label_prop
+
+        for i in range(len(query)):
+
+            name = list(query[i].keys())[0]
+            if name == "FindImage":
+                self.find_image_idx = i
+
+        if self.find_image_idx is None:
+            print("Query error. The query must containt one FindImage command")
+            raise Exception('Query Error')
+
+        if not "results" in self.query[self.find_image_idx]["FindImage"]:
+            self.query[self.find_image_idx]["FindImage"]["results"] = {}
+
+        self.query[self.find_image_idx]["FindImage"]["results"]["batch"] = {}
+
+        try:
+            r,b = self.db.query(self.query)
+            batch = r[self.find_image_idx]["FindImage"]["batch"]
+            self.total_elements = batch["total_elements"]
+        except:
+            print("Query error:")
+            print(self.query)
+            print(self.db.get_last_response_str())
+            raise
+
+    def __getitem__(self, index):
+
+        if index >= self.total_elements:
+            raise StopIteration
+
+        if not self.is_in_range(index):
+            self.get_batch(index)
+
+        idx = index % self.batch_size
+        img   = self.batch_images[idx]
+        label = self.batch_labels[idx]
+
+        nparr = np.frombuffer(img, dtype=np.uint8)
+        img   = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+        img   = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+        return img, label
+
+    def __len__(self):
+
+        return self.total_elements
+
+    def is_in_range(self, index):
+
+        if index >= self.batch_start and index < self.batch_end:
+            return True
+
+        return False
+
+    def get_batch(self, index):
+
+        total_batches = math.ceil(self.total_elements / self.batch_size)
+        batch_idx     = math.floor(index / self.batch_size)
+
+        query  = self.query
+        qbatch = query[self.find_image_idx]["FindImage"]["results"]["batch"]
+        qbatch["batch_size"] = self.batch_size
+        qbatch["batch_id"]   = batch_idx
+
+        query[self.find_image_idx]["FindImage"]["results"]["batch"] = qbatch
+
+        try:
+            r,b = self.db.query(query)
+            if len(b) == 0:
+                print("index:", index)
+                raise Exception("No results returned from ApertureDB")
+
+            self.batch_images = b
+            self.batch_start  = self.batch_size * batch_idx
+            self.batch_end    = self.batch_start + len(b)
+
+            if self.label_prop:
+                entities = r[self.find_image_idx]["FindImage"]["entities"]
+                self.batch_labels = [ l[self.label_prop] for l in entities]
+            else:
+                self.batch_labels = [ "none" for l in range(len(b))]
+        except:
+            print("Query error:")
+            print(self.db.get_last_response_str())
+            raise
diff --git a/test/test_torch_connector.py b/test/test_torch_connector.py
@@ -0,0 +1,103 @@
+import argparse
+import time
+import unittest
+
+import dbinfo
+
+from aperturedb import Connector, Status
+from aperturedb import Images
+from aperturedb import PyTorchDataset
+
+class TestTorch(unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        # ApertureDB Server Info
+        self.db_host = dbinfo.DB_HOST
+        self.db_port = dbinfo.DB_PORT
+
+        db_up = False
+        attempts = 0
+        while(not db_up):
+            try:
+                db = Connector.Connector(self.db_host, self.db_port)
+                db_up = True
+                if (attempts > 0):
+                    print("Connection to ApertureDB successful.")
+            except:
+                print("Attempt", attempts,
+                      "to connect to ApertureDB failed, retying...")
+                attempts += 1
+                time.sleep(1) # sleeps 1 second
+
+            if attempts > 10:
+                print("Failed to connect to ApertureDB after 10 attempts")
+                exit()
+
+class TestTorchDatasets(TestTorch):
+
+    '''
+        These tests need to be run after the Loaders, because it uses
+        data inserted by the loaders.
+    '''
+
+    def test_omConstraints(self):
+
+        db = Connector.Connector(self.db_host, self.db_port)
+
+        const = Images.Constraints()
+        const.greaterequal("age", 0)
+        dataset = PyTorchDataset.ApertureDBDatasetConstraints(db, constraints=const)
+
+        dbstatus = Status.Status(db)
+        self.assertEqual(len(dataset), dbstatus.count_images())
+
+        start = time.time()
+
+        # Iterate over dataset.
+        for img in dataset:
+            if len(img[0]) < 0:
+                print("Empty image?")
+                self.assertEqual(True, False)
+
+        print("\n")
+        print("Throughput (imgs/s):", len(dataset) / (time.time() - start))
+
+    def test_nativeContraints(self):
+
+        db = Connector.Connector(self.db_host, self.db_port)
+
+        query = [ {
+            "FindImage": {
+                "constraints": {
+                    "age": [">=", 0]
+                },
+                "operations": [
+                    {
+                        "type": "resize",
+                        "width": 224,
+                        "height": 224
+                    }
+                ],
+                "results": {
+                    "list": ["license"]
+                }
+            }
+        }]
+
+        dataset = PyTorchDataset.ApertureDBDataset(db, query, label_prop="license")
+
+        dbstatus = Status.Status(db)
+        self.assertEqual(len(dataset), dbstatus.count_images())
+
+        start = time.time()
+
+        # Iterate over dataset.
+        for img in dataset:
+            if len(img[0]) < 0:
+                print("Empty image?")
+                self.assertEqual(True, False)
+
+        print("\n")
+        print("Throughput (imgs/s):", len(dataset) / (time.time() - start))