aperture-data
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 86 additions & 0 deletions b/‎.github/workflows/main.yml‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎.github/workflows/main.yaml‎ renamed to ‎.github/workflows/pr.yaml‎
Lines changed: 7 additions & 6 deletions b/‎.github/workflows/main.yaml‎ renamed to ‎.github/workflows/pr.yaml‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎.github/workflows/release.yaml‎
Lines changed: 8 additions & 21 deletions b/‎.github/workflows/release.yaml‎
Lines changed: 8 additions & 21 deletions
diff --git a/‎aperturedb/CSVParser.py‎
Lines changed: 2 additions & 13 deletions b/‎aperturedb/CSVParser.py‎
Lines changed: 2 additions & 13 deletions
diff --git a/‎aperturedb/ConnectionLoader.py‎
Lines changed: 1 addition & 0 deletions b/‎aperturedb/ConnectionLoader.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aperturedb/KaggleData.py‎
Lines changed: 3 additions & 9 deletions b/‎aperturedb/KaggleData.py‎
Lines changed: 3 additions & 9 deletions
diff --git a/‎aperturedb/ParallelQuery.py‎
Lines changed: 29 additions & 1 deletion b/‎aperturedb/ParallelQuery.py‎
Lines changed: 29 additions & 1 deletion
diff --git a/‎aperturedb/Parallelizer.py‎
Lines changed: 1 addition & 0 deletions b/‎aperturedb/Parallelizer.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aperturedb/ProgressBar.py‎
Lines changed: 14 additions & 3 deletions b/‎aperturedb/ProgressBar.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎aperturedb/PytorchData.py‎
Lines changed: 3 additions & 2 deletions b/‎aperturedb/PytorchData.py‎
Lines changed: 3 additions & 2 deletions
@@ -0,0 +1,86 @@
+# This is a basic workflow to run tests on commit/PRs on develop
+
+name: main
+
+# Controls when the action will run.
+on:
+  # Triggers the workflow on push or pull request events
+  # but only for the develop, master, and release branches
+  push:
+    branches:
+      - main
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# A workflow run is made up of one or more jobs
+# that can run sequentially or in parallel
+jobs:
+  # This workflow contains a single job called "build-test"
+  build-test:
+    # The type of runner that the job will run on Ubuntu 18.04 (latest)
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be
+    # executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE,
+      # so your job can access it
+      - uses: actions/checkout@v2
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_PASS }}
+
+      - name: Run Tests
+        env:
+          AWS_ACCESS_KEY_ID:     ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_DEFAULT_REGION:    ${{ secrets.AWS_DEFAULT_REGION }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: |
+          pip3 install ipython torch torchvision boto3
+          cd test
+          bash run_test.sh
+
+  build_and_deploy_docs:
+
+    # The type of runner that the job will run on Ubuntu 18.04 (latest)
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be
+    # executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE,
+      # so your job can access it
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_PASS }}
+
+      - name: Build Notebook Docker
+        run: './ci.sh'
+        shell: bash
+
+      - name: Publish to PyPi
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        run: |
+          pip3 install twine
+          bash publish.sh
+
+      - name: Tag release version
+        run: './tag.sh'
+        shell: bash
+      
+      - name: GitHub Release
+        run: 'TOKEN=${{ secrets.GITHUBPAT }} ./github-release.sh'
+        shell: bash
@@ -1,6 +1,6 @@
 # This is a basic workflow to run tests on commit/PRs on develop
 
-name: main
+name: pr
 
 # Controls when the action will run.
 on:
@@ -51,7 +51,10 @@ jobs:
     steps:
       # Checks-out your repository under $GITHUB_WORKSPACE,
       # so your job can access it
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
 
       - name: Login to DockerHub
         uses: docker/login-action@v1
@@ -60,7 +63,5 @@ jobs:
           password: ${{ secrets.DOCKER_PASS }}
 
       - name: Build and push docker
-        run: |
-          cd docs/docker
-          bash build.sh ${GITHUB_REF##*/}
-          bash push_images.sh ${GITHUB_REF##*/}
+        run: 'NO_PUSH=1 BRANCH_NAME=${{ github.event.pull_request.head.ref }} ./ci.sh'
+        shell: bash
@@ -9,7 +9,6 @@ on:
   push:
     branches:
       - release*
-      - main
 
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
@@ -44,20 +43,7 @@ jobs:
           pip3 install ipython torch torchvision boto3
           cd test
           bash run_test.sh
-
-      - name: Build Notebook Docker
-        run: |
-          cd docker
-          bash build_images.sh
-
-      - name: Publish to PyPi
-        env:
-          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
-        run: |
-          pip3 install twine
-          bash publish.sh
-
+  
   build_and_deploy_docs:
 
     # The type of runner that the job will run on Ubuntu 18.04 (latest)
@@ -68,16 +54,17 @@ jobs:
     steps:
       # Checks-out your repository under $GITHUB_WORKSPACE,
       # so your job can access it
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
 
       - name: Login to DockerHub
         uses: docker/login-action@v1
         with:
           username: ${{ secrets.DOCKER_USER }}
           password: ${{ secrets.DOCKER_PASS }}
 
-      - name: Build and push docker
-        run: |
-          cd docs/docker
-          bash build.sh
-          bash push_images.sh
+      - name: Build Notebook Docker
+        run: './ci.sh'
+        shell: bash
@@ -1,5 +1,6 @@
 import pandas as pd
 import logging
+from aperturedb.Subscriptable import Subscriptable
 
 logger = logging.getLogger(__name__)
 
@@ -9,7 +10,7 @@
 CONSTRAINTS = "constraints"
 
 
-class CSVParser():
+class CSVParser(Subscriptable):
     """**ApertureDB General CSV Parser for Loaders.**
     ...
     """
@@ -78,18 +79,6 @@ def _basic_command(self, idx, custom_fields: dict = None):
 
         return query
 
-    def __getitem__(self, subscript):
-        if isinstance(subscript, slice):
-            start = subscript.start if subscript.start else 0
-            stop = subscript.stop if subscript.stop else len(self)
-            step = subscript.step if subscript.step else 1
-            return [self.getitem(i) for i in range(start, stop, step)]
-        else:
-            return self.getitem(subscript)
-
-    def getitem(self, subscript):
-        Exception("getitem not implemented")
-
     def validate(self):
 
         Exception("Validation not implemented!")
@@ -1,3 +1,4 @@
+import sys
 from aperturedb import ParallelLoader
 from aperturedb import CSVParser
 
 
@@ -4,9 +4,10 @@
 import pandas as pd
 from kaggle.api.kaggle_api_extended import KaggleApi
 import zipfile
+from aperturedb.Subscriptable import Subscriptable
 
 
-class KaggleData(object):
+class KaggleData(Subscriptable):
     """
     **Class to wrap around a Dataset retrieved from kaggle**
 
@@ -133,14 +134,7 @@ def __init__(
         self.collection = self.generate_index(
             workdir, self.records_count).to_dict('records')
 
-    def __getitem__(self, subscript):
-        if isinstance(subscript, slice):
-            start = subscript.start if subscript.start else 0
-            stop = subscript.stop if subscript.stop else len(self)
-            step = subscript.step if subscript.step else 1
-            return [self.generate_query(i) for i in range(start, stop, step)]
-        if subscript >= len(self.collection):
-            raise StopIteration
+    def getitem(self, subscript):
         return self.generate_query(subscript)
 
     def __len__(self):
 
@@ -2,6 +2,7 @@
 import numpy as np
 import json
 import logging
+import math
 
 logger = logging.getLogger(__name__)
 
@@ -20,6 +21,8 @@ def __init__(self, db, dry_run=False):
 
         self.type = "query"
 
+        self.responses = []
+
     def generate_batch(self, data):
         """
             Here we flatten the individual queries to run them as
@@ -30,16 +33,41 @@ def generate_batch(self, data):
 
         return q, blobs
 
+    def call_response_handler(self, r, b):
+
+        try:
+            self.generator.response_handler(r, b)
+        except BaseException as e:
+            print("handler error:", r)
+            print(e)
+
     def do_batch(self, db, data):
 
         q, blobs = self.generate_batch(data)
 
+        query_time = 0
+
         if not self.dry_run:
             r, b = db.query(q, blobs)
-            if not db.last_query_ok():
+            logger.info(f"Query={q}")
+            logger.info(f"Response={r}")
+
+            if db.last_query_ok():
+                if hasattr(self.generator, "response_handler") and callable(self.generator.response_handler):
+                    # We could potentially always call this handler function
+                    # and let the user deal with the error cases.
+
+                    cmds_per_query = math.ceil(len(r) / self.batchsize)
+                    for i in range(self.batchsize):
+                        start = i * cmds_per_query
+                        end = start + cmds_per_query
+                        self.call_response_handler(
+                            r[start:end], b[start:end])
+            else:
                 # Transaction failed entirely.
                 logger.error(f"Failed query = {q} with response = {r}")
                 self.error_counter += 1
+
             if isinstance(r, list) and not all([v['status'] == 0 for i in r for k, v in i.items()]):
                 logger.warning(
                     f"Partial errors:\r\n{json.dumps(q)}\r\n{json.dumps(r)}")
 
@@ -43,6 +43,7 @@ def run(self, generator, batchsize, numthreads, stats):
         self.batchsize  = batchsize
         self.numthreads = numthreads
         self.stats      = stats
+        self.generator = generator
         self.total_actions = len(generator)
 
         start_time = time.time()
 
@@ -13,14 +13,17 @@ def __init__(self, filename="", use_last=0):
         else:
             self.file = False
 
-        self.use_last = 5 if use_last == 0 else use_last
+        self.use_last = 50 if use_last == 0 else use_last
         self.progress = 0.0
+        self.eta = 0
 
         self.progress_arr  = []
         self.progress_time = []
 
         self.done = False
 
+        self.last_msg_call = time.time()
+
     def __del__(self):
 
         if self.file:
@@ -84,6 +87,14 @@ def compute_stats(self, progress):
         if samples >= 2:
             avg_progress_per_sec = progress_per_sec / (samples - 1)
 
+        now = time.time()
+        # If we called this function less than a second ago,
+        # we don't update ETE because jitters too much.
+        if now - self.last_msg_call < 1 and self.eta > 0:
+            return
+
+        self.last_msg_call = now
+
         if avg_progress_per_sec > 0:
             self.eta = (1 - progress) / avg_progress_per_sec
         else:
@@ -99,8 +110,8 @@ def compute_stats(self, progress):
             self.eta_unit = "s"
 
         if len(self.progress_arr) > self.use_last:
-            self.progress_arr  = self.progress_arr[:self.use_last]
-            self.progress_time = self.progress_time[:self.use_last]
+            self.progress_arr  = self.progress_arr[-(self.use_last):]
+            self.progress_time = self.progress_time[-(self.use_last):]
 
     def update(self, progress):
 
 
@@ -1,12 +1,13 @@
 from typing import List, Tuple
 from torch.utils.data import Dataset
+from aperturedb.Subscriptable import Subscriptable
 
 
-class PytorchData(object):
+class PytorchData(Subscriptable):
     def __init__(self, dataset: Dataset) -> None:
         self.loaded_dataset = [t for t in dataset]
 
-    def __getitem__(self, idx: int):
+    def getitem(self, idx: int):
         return self.generate_query(idx)
 
     def __len__(self):
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+import sys`
`1`	`2`	`from aperturedb import ParallelLoader`
`2`	`3`	`from aperturedb import CSVParser`
`3`	`4`