aperture-data
diff --git a/‎.github/workflows/main.yaml‎
Lines changed: 52 additions & 0 deletions b/‎.github/workflows/main.yaml‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 15 additions & 0 deletions b/‎README.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎aperturedb/BBoxLoader.py‎
Lines changed: 146 additions & 0 deletions b/‎aperturedb/BBoxLoader.py‎
Lines changed: 146 additions & 0 deletions
diff --git a/‎aperturedb/BlobLoader.py‎
Lines changed: 127 additions & 0 deletions b/‎aperturedb/BlobLoader.py‎
Lines changed: 127 additions & 0 deletions
@@ -0,0 +1,52 @@
+# This is a basic workflow to run tests on commit/PRs on develop
+
+name: CI-develop
+
+# Controls when the action will run.
+on:
+  # Triggers the workflow on push or pull request events
+  # but only for the develop, master, and release branches
+  push:
+    branches:
+      - develop
+      - master
+  pull_request:
+    branches:
+      - develop
+      - master
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# A workflow run is made up of one or more jobs
+# that can run sequentially or in parallel
+jobs:
+  # This workflow contains a single job called "build-test"
+  build-test:
+    # The type of runner that the job will run on Ubuntu 18.04 (latest)
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be
+    # executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE,
+      # so your job can access it
+      - uses: actions/checkout@v2
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_PASS }}
+
+      - name: Run Tests
+        run: |
+          pip3 install .
+          pip3 install ipython torch torchvision
+          cd test
+          bash run_test.sh
+
+      - name: Build Notebook Docker
+        run: |
+          cd docker
+          bash build_images.sh
@@ -127,3 +127,8 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+#Data files
+*.adb.csv
+*.jpg
+*.npy
@@ -0,0 +1,15 @@
+# ApertureDB Client Python Module
+
+This is the python client module for ApertureDB.
+
+It provides a connector to AperetureDB instances using 
+the open source connector for [VDMS](https://github.com/IntelLabs/vdms).
+
+It also implements an Object-Mapper API to interact with 
+elements in ApertureDB at the object level.
+
+* Status.py provides helper methods to retrieve information about the db.
+* Images.py provides the Object-Mapper for image related objetcs (images, bounding boxes, etc)
+* NotebookHelpers.py provides helpers to show images/bounding boxes on Jupyter Notebooks
+
+For more information, visit https://aperturedata.io
@@ -0,0 +1,146 @@
+import math
+import time
+from threading import Thread
+
+import numpy  as np
+import pandas as pd
+
+from aperturedb import Status
+from aperturedb import ParallelLoader
+from aperturedb import CSVParser
+
+HEADER_X_POS  = "x_pos"
+HEADER_Y_POS  = "y_pos"
+HEADER_WIDTH  = "width"
+HEADER_HEIGHT = "height"
+IMG_KEY_PROP  = "img_key_prop"
+IMG_KEY_VAL   = "img_key_value"
+
+class BBoxGeneratorCSV(CSVParser.CSVParser):
+
+    '''
+        ApertureDB BBox Data loader.
+        Expects a csv file with the following columns:
+
+            IMG_KEY,x_pos,y_pos,width,height,BBOX_PROP_NAME_1, ... BBOX_PROP_NAME_N
+
+        IMG_KEY column has the property name of the image property that
+        the bounding box will be connected to, and each row has the value
+        that will be used for finding the image.
+
+        x_pos,y_pos,width,height are the coordinates of the bounding boxes,
+        as integers (unit is in pixels)
+
+        BBOX_PROP_NAME_N is an arbitrary name of the property of the bounding
+        box, and each row has the value for that property.
+
+        Example csv file:
+        img_unique_id,x_pos,y_pos,width,height,type
+        d5b25253-9c1e,257,154,84,125,manual
+        d5b25253-9c1e,7,537,522,282,manual
+        ...
+    '''
+
+    def __init__(self, filename):
+
+        super().__init__(filename)
+
+        self.props_keys       = [x for x in self.header[5:] if not x.startswith(CSVParser.CONTRAINTS_PREFIX) ]
+        self.constraints_keys = [x for x in self.header[5:] if x.startswith(CSVParser.CONTRAINTS_PREFIX) ]
+
+        self.img_key = self.header[0]
+
+    def __getitem__(self, idx):
+
+        data = {
+            "x":      int(self.df.loc[idx, HEADER_X_POS]),
+            "y":      int(self.df.loc[idx, HEADER_Y_POS]),
+            "width":  int(self.df.loc[idx, HEADER_WIDTH]),
+            "height": int(self.df.loc[idx, HEADER_HEIGHT]),
+        }
+
+        val = self.df.loc[idx, self.img_key]
+
+        if val != "":
+            data[IMG_KEY_PROP] = self.img_key
+            data[IMG_KEY_VAL]  = val
+
+        properties  = self.parse_properties(self.df, idx)
+        constraints = self.parse_constraints(self.df, idx)
+
+        if properties:
+            data[CSVParser.PROPERTIES] = properties
+
+        if constraints:
+            data[CSVParser.CONSTRAINTS] = constraints
+
+        return data
+
+    def validate(self):
+
+        self.header = list(self.df.columns.values)
+
+        if self.header[1] != HEADER_X_POS:
+            raise Exception("Error with CSV file field: " + HEADER_X_POS)
+        if self.header[2] != HEADER_Y_POS:
+            raise Exception("Error with CSV file field: " + HEADER_Y_POS)
+        if self.header[3] != HEADER_WIDTH:
+            raise Exception("Error with CSV file field: " + HEADER_WIDTH)
+        if self.header[4] != HEADER_HEIGHT:
+            raise Exception("Error with CSV file field: " + HEADER_HEIGHT)
+
+class BBoxLoader(ParallelLoader.ParallelLoader):
+
+    def __init__(self, db, dry_run=False):
+
+        super().__init__(db, dry_run=dry_run)
+
+        self.type = "bbox"
+
+    def generate_batch(self, bbox_data):
+
+        q = []
+
+        ref_counter = 1
+        for data in bbox_data:
+
+            # TODO we could reuse image references within the batch
+            # instead of creating a new find for every image.
+            img_ref = ref_counter
+            ref_counter += 1
+            fi = {
+                "FindImage": {
+                    "_ref": img_ref,
+                }
+            }
+
+            if IMG_KEY_PROP in data:
+                key = data[IMG_KEY_PROP]
+                val = data[IMG_KEY_VAL]
+                constraints = {}
+                constraints[key] = ["==", val]
+                fi["FindImage"]["constraints"] = constraints
+
+            q.append(fi)
+
+            ai = {
+                "AddBoundingBox": {
+                    "image": img_ref,
+                    "rectangle": {
+                        "x":      data["x"],
+                        "y":      data["y"],
+                        "width":  data["width"],
+                        "height": data["height"],
+                    },
+                }
+            }
+
+            if "properties" in data:
+                ai["AddBoundingBox"]["properties"] = data[CSVParser.PROPERTIES]
+
+            q.append(ai)
+
+        if self.dry_run:
+            print(q)
+
+        return q, []
@@ -0,0 +1,127 @@
+import math
+import time
+from threading import Thread
+
+import numpy  as np
+import pandas as pd
+
+from aperturedb import Status
+from aperturedb import ParallelLoader
+from aperturedb import CSVParser
+
+PROPERTIES  = "properties"
+CONSTRAINTS = "constraints"
+BLOB_PATH   = "filename"
+
+class BlobGeneratorCSV(CSVParser.CSVParser):
+
+    '''
+        ApertureDB Blob Data loader.
+        Expects a csv file with the following columns:
+
+            filename,PROP_NAME_1, ... PROP_NAME_N,constraint_PROP1
+
+        Example csv file:
+        filename,name,lastname,age,id,constaint_id
+        /mnt/blob1,John,Salchi,69,321423532,321423532
+        /mnt/blob2,Johna,Salchi,63,42342522,42342522
+        ...
+    '''
+
+    def __init__(self, filename):
+
+        super().__init__(filename)
+
+        self.props_keys       = [x for x in self.header[1:] if not x.startswith(CSVParser.CONTRAINTS_PREFIX)]
+        self.props_keys       = [x for x in self.props_keys if x != BLOB_PATH]
+        self.constraints_keys = [x for x in self.header[1:] if x.startswith(CSVParser.CONTRAINTS_PREFIX) ]
+
+    def __getitem__(self, idx):
+
+        data = {}
+
+        filename      = self.df.loc[idx, BLOB_PATH]
+        blob_ok, blob = self.load_blob(filename)
+        if not blob_ok:
+            Exception("Error loading blob: " + filename )
+        data["blob"] = blob
+
+        properties  = self.parse_properties (self.df, idx)
+        constraints = self.parse_constraints(self.df, idx)
+
+        if properties:
+            data[PROPERTIES] = properties
+
+        if constraints:
+            data[CONSTRAINTS] = constraints
+
+        return data
+
+    def load_blob(self, filename):
+
+        try:
+            fd = open(filename, "rb")
+            buff = fd.read()
+            fd.close()
+            return True, buff
+        except:
+            print("BLOB ERROR:", filename)
+
+        return False, None
+
+    def validate(self):
+
+        self.header = list(self.df.columns.values)
+
+        if self.header[0] != BLOB_PATH:
+            raise Exception("Error with CSV file field: " + BLOB_PATH)
+
+class BlobLoader(ParallelLoader.ParallelLoader):
+
+    '''
+        ApertureDB Blob Loader.
+
+        This class is to be used in combination with a "generator".
+        The generator must be an iterable object that generated "Blob_data"
+        elements:
+            Blob_data = {
+                "properties":  properties,
+                "constraints": constraints,
+            }
+    '''
+
+    def __init__(self, db, dry_run=False):
+
+        super().__init__(db, dry_run=dry_run)
+
+        self.type = "blob"
+
+    def generate_batch(self, Blob_data):
+
+        q = []
+        blobs = []
+
+        for data in Blob_data:
+
+            ae = {
+                "AddBlob": {
+                }
+            }
+
+            if PROPERTIES in data:
+                ae["AddBlob"][PROPERTIES] = data[PROPERTIES]
+
+            if CONSTRAINTS in data:
+                ae["AddBlob"]["if_not_found"] = data[CONSTRAINTS]
+
+            q.append(ae)
+
+            if len(data["blob"]) == 0:
+                print("WARNING: Skipping empty blob.")
+                continue
+            blobs.append(data["blob"])
+
+        if self.dry_run:
+            print(q)
+
+        return q, blobs