Merge pull request #21 from aperture-data/release-0.0.11

luisremis · web-flow · commit 248c634bc415 · 2021-08-24T18:33:38.000-07:00
Release 0.0.11
diff --git a/aperturedb/BBoxLoader.py b/aperturedb/BBoxLoader.py
@@ -136,7 +136,14 @@ def generate_batch(self, bbox_data):
             }
 
             if "properties" in data:
-                ai["AddBoundingBox"]["properties"] = data[CSVParser.PROPERTIES]
+                props = data[CSVParser.PROPERTIES]
+                if "_label" in props:
+                    ai["AddBoundingBox"]["label"] = props["_label"]
+                    props.pop("_label")
+
+                # Check if props is not empty after removing "_label"
+                if props:
+                    ai["AddBoundingBox"]["properties"] = props
 
             q.append(ai)
 
diff --git a/aperturedb/ImageDownloader.py b/aperturedb/ImageDownloader.py
@@ -26,7 +26,7 @@ class ImageDownloaderCSV(CSVParser.CSVParser):
     def __init__(self, filename, check_image=True):
 
         self.has_filename = False
-        self.check_image = check_image
+        self.check_img    = check_image
 
         super().__init__(filename)
 
diff --git a/aperturedb/Images.py b/aperturedb/Images.py
@@ -11,6 +11,8 @@
 from matplotlib.collections import PatchCollection
 from matplotlib.patches import Polygon
 
+from aperturedb import Status
+
 class Constraints(object):
 
     def __init__(self):
@@ -88,7 +90,7 @@ def __init__(self, db, batch_size=100):
         self.display_limit = 20
 
         self.img_id_prop     = "_uniqueid"
-        self.bbox_label_prop = "label"
+        self.bbox_label_prop = "_label"
 
     def __retrieve_batch(self, index):
 
@@ -152,15 +154,19 @@ def __get_bounding_boxes_polygons(self, index):
                 "blobs": False,
             }
         }, {
-            "FindEntity": {
+            "FindBlob": {
                 "is_connected_to": {
                     "ref": 1
                 },
+                "constraints": {
+                    "type": ["==", "segmentation"]
+                },
                 "blobs": True,
             }
         }]
 
         res, polygons = self.db_connector.query(query)
+
         ret_poly.append(polygons)
 
         uniqueid_str = str(uniqueid)
@@ -225,9 +231,7 @@ def __retrieve_bounding_boxes(self, index):
                 "_ref": 2,
                 "blobs": False,
                 "coordinates": True,
-                "results": {
-                    "list": [self.bbox_label_prop],
-                }
+                "labels": True,
             }
         }]
 
@@ -458,18 +462,12 @@ def display(self, show_bboxes=False, show_segmentation=False, limit=None):
 
     def get_props_names(self):
 
-        query = [ {
-            "GetSchema": {
-                "type" : "entities"
-            }
-        }]
-
-        res, images = self.db_connector.query(query)
+        status = Status.Status(self.db_connector)
+        schema = status.get_schema()
 
         try:
-            dictio = res[0]["FindImageInfo"]["entities"]["classes"][0]["_Image"]
-            search_key = "VD:" # TODO WHAT IS THIS?
-            props_array = [key for key, val in dictio.items() if not search_key in key]
+            dictio = schema["entities"]["classes"]["_Image"]["properties"]
+            props_array = [key for key, val in dictio.items()]
         except:
             props_array = []
             print("Cannot retrieve properties")
diff --git a/aperturedb/NotebookHelpers.py b/aperturedb/NotebookHelpers.py
@@ -3,9 +3,10 @@
 import numpy as np
 
 from PIL import Image
+from IPython.display import Video
 from IPython.display import display as ds
 
-DESTINATION_FOLDER = "result_images"
+DESTINATION_FOLDER = "results"
 
 def check_folder(folder):
     if not os.path.exists(folder):
@@ -53,3 +54,14 @@ def draw_bboxes(image, boxes=[], tags=[], save=False):
         check_folder(DESTINATION_FOLDER)
         img_file = DESTINATION_FOLDER + '/res_bboxes.jpg'
         cv2.imwrite(img_file, cv_image)
+
+def display_video_mp4(blob):
+
+    check_folder(DESTINATION_FOLDER)
+
+    name = DESTINATION_FOLDER + "/" + "video_tmp.mp4"
+    fd = open(name, 'wb')
+    fd.write(blob)
+    fd.close()
+
+    ds(Video(name, embed=True))
diff --git a/aperturedb/Status.py b/aperturedb/Status.py
@@ -23,6 +23,27 @@ def status(self):
 
         return self.connector.get_last_response_str()
 
+    def get_schema(self, refresh=False):
+
+        query = [ {
+            "GetSchema": {
+                "type" : "entities",
+                "refresh": refresh,
+            }
+        }]
+
+        res, blobs = self.connector.query(query)
+
+        schema = {}
+
+        try:
+            schema = res[0]["GetSchema"]
+        except:
+            print("Cannot retrieve schema")
+            print(self.connector.get_last_response_str())
+
+        return schema
+
     def count_images(self, constraints={}):
 
         q = [{
diff --git a/aperturedb/VideoDownloader.py b/aperturedb/VideoDownloader.py
@@ -0,0 +1,150 @@
+import time
+import requests
+import os
+from os import path
+
+import cv2
+import numpy as np
+
+from aperturedb import ParallelLoader
+from aperturedb import CSVParser
+from aperturedb import ProgressBar
+
+HEADER_PATH = "filename"
+HEADER_URL  = "url"
+
+class VideoDownloaderCSV(CSVParser.CSVParser):
+
+    '''
+        ApertureDB Video Downloader.
+        Expects a csv file with AT LEAST a "url" column, and
+        optionally a "filename" field.
+        If "filename" is not present, it is taken from the url.
+    '''
+
+    def __init__(self, filename, check_video=True):
+
+        self.has_filename = False
+        self.check_video = check_video
+
+        super().__init__(filename)
+
+    def __getitem__(self, idx):
+
+        url = self.df.loc[idx, HEADER_URL]
+
+        if self.has_filename:
+            filename = self.df.loc[idx, HEADER_PATH]
+        else:
+            filename = self.url_to_filename(url)
+
+        return url, filename
+
+    def url_to_filename(self, url):
+
+        filename = url.split("/")[-1]
+        folder = "/tmp/videos/"
+
+        return folder + filename
+
+    def validate(self):
+
+        self.header = list(self.df.columns.values)
+
+        if HEADER_URL not in self.header:
+            raise Exception("Error with CSV file field: url. Must be a field")
+
+        if HEADER_PATH in self.header:
+            self.has_filename = True
+
+class VideoDownloader(ParallelLoader.ParallelLoader):
+
+    def __init__(self, db, dry_run=False):
+
+        super().__init__(db, dry_run=dry_run)
+
+        self.type = "video"
+
+        self.check_video = False
+
+    def check_if_video_is_ok(self, filename, url):
+
+        if not os.path.exists(filename):
+            return False
+
+        try:
+            a = cv2.VideoCapture(filename)
+            if a.isOpened() == False:
+                print("Video present but error reading it:", url)
+                return False
+        except:
+            print("Video present but error decoding:", url)
+            return False
+
+        return True
+
+    def download_video(self, url, filename):
+
+        start = time.time()
+
+        if self.check_video and self.check_if_video_is_ok(filename, url):
+            return
+
+        folder = os.path.dirname(filename)
+        if not os.path.exists(folder):
+            os.makedirs(folder, exist_ok=True)
+
+        videodata = requests.get(url)
+        if videodata.ok:
+            fd = open(filename, "wb")
+            fd.write(videodata.content)
+            fd.close()
+
+            try:
+                a = cv2.VideoCapture(filename)
+                if a.isOpened() == False:
+                    print("Downloaded Video size error:", url)
+                    os.remove(filename)
+                    self.error_counter += 1
+            except:
+                print("Downloaded Video cannot be decoded:", url)
+                os.remove(filename)
+                self.error_counter += 1
+        else:
+            print("URL not found:", url)
+            self.error_counter += 1
+
+        self.times_arr.append(time.time() - start)
+
+    def worker(self, thid, generator, start, end):
+
+        if thid == 0 and self.stats:
+            pb = ProgressBar.ProgressBar("download_progress.txt")
+
+        for i in range(start, end):
+
+            url, filename = generator[i]
+
+            self.download_video(url, filename)
+
+            if thid == 0 and self.stats:
+                pb.update((i - start) / (end - start))
+
+        if thid == 0 and self.stats:
+            pb.update(1)
+
+    def print_stats(self):
+
+        print("====== ApertureDB VideoDownloader Stats ======")
+
+        times = np.array(self.times_arr)
+        print("Avg Video download time(s):", np.mean(times))
+        print("Img download time std:", np.std (times))
+        print("Avg download throughput (videos/s)):",
+            1 / np.mean(times) * self.numthreads)
+
+        print("Total time(s):", self.ingestion_time)
+        print("Overall throughput (videos/s):",
+            self.total_elements / self.ingestion_time)
+        print("Total errors encountered:", self.error_counter)
+        print("=============================================")
diff --git a/aperturedb/VideoLoader.py b/aperturedb/VideoLoader.py
diff --git a/setup.py b/setup.py