Merge pull request #18 from aperture-data/add_video_features

luisremis · web-flow · commit c6be28ad2183 · 2021-08-24T07:36:59.000-07:00
Add video features
diff --git a/aperturedb/ImageDownloader.py b/aperturedb/ImageDownloader.py
@@ -26,7 +26,7 @@ class ImageDownloaderCSV(CSVParser.CSVParser):
     def __init__(self, filename, check_image=True):
 
         self.has_filename = False
-        self.check_image = check_image
+        self.check_img    = check_image
 
         super().__init__(filename)
 
diff --git a/aperturedb/NotebookHelpers.py b/aperturedb/NotebookHelpers.py
@@ -3,9 +3,10 @@
 import numpy as np
 
 from PIL import Image
+from IPython.display import Video
 from IPython.display import display as ds
 
-DESTINATION_FOLDER = "result_images"
+DESTINATION_FOLDER = "results"
 
 def check_folder(folder):
     if not os.path.exists(folder):
@@ -53,3 +54,14 @@ def draw_bboxes(image, boxes=[], tags=[], save=False):
         check_folder(DESTINATION_FOLDER)
         img_file = DESTINATION_FOLDER + '/res_bboxes.jpg'
         cv2.imwrite(img_file, cv_image)
+
+def display_video_mp4(blob):
+
+    check_folder(DESTINATION_FOLDER)
+
+    name = DESTINATION_FOLDER + "/" + "video_tmp.mp4"
+    fd = open(name, 'wb')
+    fd.write(blob)
+    fd.close()
+
+    ds(Video(name, embed=True))
diff --git a/aperturedb/VideoDownloader.py b/aperturedb/VideoDownloader.py
@@ -0,0 +1,150 @@
+import time
+import requests
+import os
+from os import path
+
+import cv2
+import numpy as np
+
+from aperturedb import ParallelLoader
+from aperturedb import CSVParser
+from aperturedb import ProgressBar
+
+HEADER_PATH = "filename"
+HEADER_URL  = "url"
+
+class VideoDownloaderCSV(CSVParser.CSVParser):
+
+    '''
+        ApertureDB Video Downloader.
+        Expects a csv file with AT LEAST a "url" column, and
+        optionally a "filename" field.
+        If "filename" is not present, it is taken from the url.
+    '''
+
+    def __init__(self, filename, check_video=True):
+
+        self.has_filename = False
+        self.check_video = check_video
+
+        super().__init__(filename)
+
+    def __getitem__(self, idx):
+
+        url = self.df.loc[idx, HEADER_URL]
+
+        if self.has_filename:
+            filename = self.df.loc[idx, HEADER_PATH]
+        else:
+            filename = self.url_to_filename(url)
+
+        return url, filename
+
+    def url_to_filename(self, url):
+
+        filename = url.split("/")[-1]
+        folder = "/tmp/videos/"
+
+        return folder + filename
+
+    def validate(self):
+
+        self.header = list(self.df.columns.values)
+
+        if HEADER_URL not in self.header:
+            raise Exception("Error with CSV file field: url. Must be a field")
+
+        if HEADER_PATH in self.header:
+            self.has_filename = True
+
+class VideoDownloader(ParallelLoader.ParallelLoader):
+
+    def __init__(self, db, dry_run=False):
+
+        super().__init__(db, dry_run=dry_run)
+
+        self.type = "video"
+
+        self.check_video = False
+
+    def check_if_video_is_ok(self, filename, url):
+
+        if not os.path.exists(filename):
+            return False
+
+        try:
+            a = cv2.VideoCapture(filename)
+            if a.isOpened() == False:
+                print("Video present but error reading it:", url)
+                return False
+        except:
+            print("Video present but error decoding:", url)
+            return False
+
+        return True
+
+    def download_video(self, url, filename):
+
+        start = time.time()
+
+        if self.check_video and self.check_if_video_is_ok(filename, url):
+            return
+
+        folder = os.path.dirname(filename)
+        if not os.path.exists(folder):
+            os.makedirs(folder, exist_ok=True)
+
+        videodata = requests.get(url)
+        if videodata.ok:
+            fd = open(filename, "wb")
+            fd.write(videodata.content)
+            fd.close()
+
+            try:
+                a = cv2.VideoCapture(filename)
+                if a.isOpened() == False:
+                    print("Downloaded Video size error:", url)
+                    os.remove(filename)
+                    self.error_counter += 1
+            except:
+                print("Downloaded Video cannot be decoded:", url)
+                os.remove(filename)
+                self.error_counter += 1
+        else:
+            print("URL not found:", url)
+            self.error_counter += 1
+
+        self.times_arr.append(time.time() - start)
+
+    def worker(self, thid, generator, start, end):
+
+        if thid == 0 and self.stats:
+            pb = ProgressBar.ProgressBar("download_progress.txt")
+
+        for i in range(start, end):
+
+            url, filename = generator[i]
+
+            self.download_video(url, filename)
+
+            if thid == 0 and self.stats:
+                pb.update((i - start) / (end - start))
+
+        if thid == 0 and self.stats:
+            pb.update(1)
+
+    def print_stats(self):
+
+        print("====== ApertureDB VideoDownloader Stats ======")
+
+        times = np.array(self.times_arr)
+        print("Avg Video download time(s):", np.mean(times))
+        print("Img download time std:", np.std (times))
+        print("Avg download throughput (videos/s)):",
+            1 / np.mean(times) * self.numthreads)
+
+        print("Total time(s):", self.ingestion_time)
+        print("Overall throughput (videos/s):",
+            self.total_elements / self.ingestion_time)
+        print("Total errors encountered:", self.error_counter)
+        print("=============================================")
diff --git a/aperturedb/VideoLoader.py b/aperturedb/VideoLoader.py
@@ -0,0 +1,140 @@
+import math
+import time
+from threading import Thread
+
+import numpy as np
+import cv2
+
+from aperturedb import Status
+from aperturedb import ParallelLoader
+from aperturedb import CSVParser
+
+HEADER_PATH = "filename"
+PROPERTIES  = "properties"
+CONSTRAINTS = "constraints"
+
+class VideoGeneratorCSV(CSVParser.CSVParser):
+
+    '''
+        ApertureDB Video Data loader.
+        Expects a csv file with the following columns:
+
+            filename,PROP_NAME_1, ... PROP_NAME_N,constraint_PROP1
+
+        Example csv file:
+        filename,id,label,constaint_id
+        /home/user/file1.jpg,321423532,dog,321423532
+        /home/user/file2.jpg,42342522,cat,4234252
+        ...
+    '''
+
+    def __init__(self, filename, check_video=True):
+
+        super().__init__(filename)
+
+        self.check_video = check_video
+
+        self.props_keys       = [x for x in self.header[1:] if not x.startswith(CSVParser.CONTRAINTS_PREFIX)]
+        self.constraints_keys = [x for x in self.header[1:] if x.startswith(CSVParser.CONTRAINTS_PREFIX) ]
+
+    def __getitem__(self, idx):
+
+        filename   = self.df.loc[idx, HEADER_PATH]
+        data = {}
+
+        video_ok, video = self.load_video(filename)
+        if not video_ok:
+            print("Error loading video: " + filename )
+            return data
+
+        data["video_blob"] = video
+
+        properties  = self.parse_properties(self.df, idx)
+        constraints = self.parse_constraints(self.df, idx)
+
+        if properties:
+            data[PROPERTIES] = properties
+
+        if constraints:
+            data[CONSTRAINTS] = constraints
+
+        return data
+
+    def load_video(self, filename):
+
+        if self.check_video:
+            try:
+                a = cv2.VideoCapture(filename)
+                if a.isOpened() == False:
+                    print("Video reading Error:", filename)
+            except:
+                print("Video Error:", filename)
+
+        try:
+            fd = open(filename, "rb")
+            buff = fd.read()
+            fd.close()
+            return True, buff
+        except:
+            print("Video Error:", filename)
+
+        return False, None
+
+    def validate(self):
+
+        self.header = list(self.df.columns.values)
+
+        if self.header[0] != HEADER_PATH:
+            raise Exception("Error with CSV file field: filename. Must be first field")
+
+class VideoLoader(ParallelLoader.ParallelLoader):
+
+    '''
+        ApertureDB Video Loader.
+
+        This class is to be used in combination with a "generator".
+        The generator must be an iterable object that generated "image_data"
+        elements:
+            image_data = {
+                "properties":  properties,
+                "constraints": constraints,
+                "operations":  operations,
+                "video_blob":    (bytes),
+            }
+    '''
+
+    def __init__(self, db, dry_run=False):
+
+        super().__init__(db, dry_run=dry_run)
+
+        self.type = "video"
+
+    def generate_batch(self, video_data):
+
+        q = []
+        blobs = []
+
+        for data in video_data:
+
+            ai = {
+                "AddVideo": {
+                }
+            }
+
+            if "properties" in data:
+                ai["AddVideo"]["properties"] = data["properties"]
+            if "constraints" in data:
+                ai["AddVideo"]["if_not_found"] = data["constraints"]
+            if "operations" in data:
+                ai["AddVideo"]["operations"] = data["operations"]
+            if "format" in data:
+                ai["AddVideo"]["format"] = data["format"]
+
+            if "video_blob" not in data or len(data["video_blob"]) == 0:
+                print("WARNING: Skipping empty video.")
+                continue
+
+            blobs.append(data["video_blob"])
+            q.append(ai)
+
+        return q, blobs