Skip to content

Commit 042b693

Browse files
authored
Merge pull request #107 from aperture-data/release-0.2.2
Release 0.2.2
2 parents 7c8b155 + e4ea690 commit 042b693

32 files changed

+556
-177
lines changed

.github/workflows/main.yml

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# This is a basic workflow to run tests on commit/PRs on develop
2+
3+
name: main
4+
5+
# Controls when the action will run.
6+
on:
7+
# Triggers the workflow on push or pull request events
8+
# but only for the develop, master, and release branches
9+
push:
10+
branches:
11+
- main
12+
13+
# Allows you to run this workflow manually from the Actions tab
14+
workflow_dispatch:
15+
16+
# A workflow run is made up of one or more jobs
17+
# that can run sequentially or in parallel
18+
jobs:
19+
# This workflow contains a single job called "build-test"
20+
build-test:
21+
# The type of runner that the job will run on Ubuntu 18.04 (latest)
22+
runs-on: ubuntu-latest
23+
24+
# Steps represent a sequence of tasks that will be
25+
# executed as part of the job
26+
steps:
27+
# Checks-out your repository under $GITHUB_WORKSPACE,
28+
# so your job can access it
29+
- uses: actions/checkout@v2
30+
31+
- name: Login to DockerHub
32+
uses: docker/login-action@v1
33+
with:
34+
username: ${{ secrets.DOCKER_USER }}
35+
password: ${{ secrets.DOCKER_PASS }}
36+
37+
- name: Run Tests
38+
env:
39+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
40+
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
41+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
42+
run: |
43+
pip3 install ipython torch torchvision boto3
44+
cd test
45+
bash run_test.sh
46+
47+
build_and_deploy_docs:
48+
49+
# The type of runner that the job will run on Ubuntu 18.04 (latest)
50+
runs-on: ubuntu-latest
51+
52+
# Steps represent a sequence of tasks that will be
53+
# executed as part of the job
54+
steps:
55+
# Checks-out your repository under $GITHUB_WORKSPACE,
56+
# so your job can access it
57+
- uses: actions/checkout@v3
58+
with:
59+
fetch-depth: 0
60+
ref: ${{ github.event.pull_request.head.ref }}
61+
62+
- name: Login to DockerHub
63+
uses: docker/login-action@v1
64+
with:
65+
username: ${{ secrets.DOCKER_USER }}
66+
password: ${{ secrets.DOCKER_PASS }}
67+
68+
- name: Build Notebook Docker
69+
run: './ci.sh'
70+
shell: bash
71+
72+
- name: Publish to PyPi
73+
env:
74+
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
75+
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
76+
run: |
77+
pip3 install twine
78+
bash publish.sh
79+
80+
- name: Tag release version
81+
run: './tag.sh'
82+
shell: bash
83+
84+
- name: GitHub Release
85+
run: 'TOKEN=${{ secrets.GITHUBPAT }} ./github-release.sh'
86+
shell: bash

.github/workflows/main.yaml renamed to .github/workflows/pr.yaml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# This is a basic workflow to run tests on commit/PRs on develop
22

3-
name: main
3+
name: pr
44

55
# Controls when the action will run.
66
on:
@@ -51,7 +51,10 @@ jobs:
5151
steps:
5252
# Checks-out your repository under $GITHUB_WORKSPACE,
5353
# so your job can access it
54-
- uses: actions/checkout@v2
54+
- uses: actions/checkout@v3
55+
with:
56+
fetch-depth: 0
57+
ref: ${{ github.event.pull_request.head.ref }}
5558

5659
- name: Login to DockerHub
5760
uses: docker/login-action@v1
@@ -60,7 +63,5 @@ jobs:
6063
password: ${{ secrets.DOCKER_PASS }}
6164

6265
- name: Build and push docker
63-
run: |
64-
cd docs/docker
65-
bash build.sh ${GITHUB_REF##*/}
66-
bash push_images.sh ${GITHUB_REF##*/}
66+
run: 'NO_PUSH=1 BRANCH_NAME=${{ github.event.pull_request.head.ref }} ./ci.sh'
67+
shell: bash

.github/workflows/release.yaml

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ on:
99
push:
1010
branches:
1111
- release*
12-
- main
1312

1413
# Allows you to run this workflow manually from the Actions tab
1514
workflow_dispatch:
@@ -44,20 +43,7 @@ jobs:
4443
pip3 install ipython torch torchvision boto3
4544
cd test
4645
bash run_test.sh
47-
48-
- name: Build Notebook Docker
49-
run: |
50-
cd docker
51-
bash build_images.sh
52-
53-
- name: Publish to PyPi
54-
env:
55-
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
56-
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
57-
run: |
58-
pip3 install twine
59-
bash publish.sh
60-
46+
6147
build_and_deploy_docs:
6248

6349
# The type of runner that the job will run on Ubuntu 18.04 (latest)
@@ -68,16 +54,17 @@ jobs:
6854
steps:
6955
# Checks-out your repository under $GITHUB_WORKSPACE,
7056
# so your job can access it
71-
- uses: actions/checkout@v2
57+
- uses: actions/checkout@v3
58+
with:
59+
fetch-depth: 0
60+
ref: ${{ github.event.pull_request.head.ref }}
7261

7362
- name: Login to DockerHub
7463
uses: docker/login-action@v1
7564
with:
7665
username: ${{ secrets.DOCKER_USER }}
7766
password: ${{ secrets.DOCKER_PASS }}
7867

79-
- name: Build and push docker
80-
run: |
81-
cd docs/docker
82-
bash build.sh
83-
bash push_images.sh
68+
- name: Build Notebook Docker
69+
run: './ci.sh'
70+
shell: bash

aperturedb/CSVParser.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pandas as pd
22
import logging
3+
from aperturedb.Subscriptable import Subscriptable
34

45
logger = logging.getLogger(__name__)
56

@@ -9,7 +10,7 @@
910
CONSTRAINTS = "constraints"
1011

1112

12-
class CSVParser():
13+
class CSVParser(Subscriptable):
1314
"""**ApertureDB General CSV Parser for Loaders.**
1415
...
1516
"""
@@ -78,18 +79,6 @@ def _basic_command(self, idx, custom_fields: dict = None):
7879

7980
return query
8081

81-
def __getitem__(self, subscript):
82-
if isinstance(subscript, slice):
83-
start = subscript.start if subscript.start else 0
84-
stop = subscript.stop if subscript.stop else len(self)
85-
step = subscript.step if subscript.step else 1
86-
return [self.getitem(i) for i in range(start, stop, step)]
87-
else:
88-
return self.getitem(subscript)
89-
90-
def getitem(self, subscript):
91-
Exception("getitem not implemented")
92-
9382
def validate(self):
9483

9584
Exception("Validation not implemented!")

aperturedb/ConnectionLoader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import sys
12
from aperturedb import ParallelLoader
23
from aperturedb import CSVParser
34

aperturedb/KaggleData.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
import pandas as pd
55
from kaggle.api.kaggle_api_extended import KaggleApi
66
import zipfile
7+
from aperturedb.Subscriptable import Subscriptable
78

89

9-
class KaggleData(object):
10+
class KaggleData(Subscriptable):
1011
"""
1112
**Class to wrap around a Dataset retrieved from kaggle**
1213
@@ -133,14 +134,7 @@ def __init__(
133134
self.collection = self.generate_index(
134135
workdir, self.records_count).to_dict('records')
135136

136-
def __getitem__(self, subscript):
137-
if isinstance(subscript, slice):
138-
start = subscript.start if subscript.start else 0
139-
stop = subscript.stop if subscript.stop else len(self)
140-
step = subscript.step if subscript.step else 1
141-
return [self.generate_query(i) for i in range(start, stop, step)]
142-
if subscript >= len(self.collection):
143-
raise StopIteration
137+
def getitem(self, subscript):
144138
return self.generate_query(subscript)
145139

146140
def __len__(self):

aperturedb/ParallelQuery.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import numpy as np
33
import json
44
import logging
5+
import math
56

67
logger = logging.getLogger(__name__)
78

@@ -20,6 +21,8 @@ def __init__(self, db, dry_run=False):
2021

2122
self.type = "query"
2223

24+
self.responses = []
25+
2326
def generate_batch(self, data):
2427
"""
2528
Here we flatten the individual queries to run them as
@@ -30,16 +33,41 @@ def generate_batch(self, data):
3033

3134
return q, blobs
3235

36+
def call_response_handler(self, r, b):
37+
38+
try:
39+
self.generator.response_handler(r, b)
40+
except BaseException as e:
41+
print("handler error:", r)
42+
print(e)
43+
3344
def do_batch(self, db, data):
3445

3546
q, blobs = self.generate_batch(data)
3647

48+
query_time = 0
49+
3750
if not self.dry_run:
3851
r, b = db.query(q, blobs)
39-
if not db.last_query_ok():
52+
logger.info(f"Query={q}")
53+
logger.info(f"Response={r}")
54+
55+
if db.last_query_ok():
56+
if hasattr(self.generator, "response_handler") and callable(self.generator.response_handler):
57+
# We could potentially always call this handler function
58+
# and let the user deal with the error cases.
59+
60+
cmds_per_query = math.ceil(len(r) / self.batchsize)
61+
for i in range(self.batchsize):
62+
start = i * cmds_per_query
63+
end = start + cmds_per_query
64+
self.call_response_handler(
65+
r[start:end], b[start:end])
66+
else:
4067
# Transaction failed entirely.
4168
logger.error(f"Failed query = {q} with response = {r}")
4269
self.error_counter += 1
70+
4371
if isinstance(r, list) and not all([v['status'] == 0 for i in r for k, v in i.items()]):
4472
logger.warning(
4573
f"Partial errors:\r\n{json.dumps(q)}\r\n{json.dumps(r)}")

aperturedb/Parallelizer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def run(self, generator, batchsize, numthreads, stats):
4343
self.batchsize = batchsize
4444
self.numthreads = numthreads
4545
self.stats = stats
46+
self.generator = generator
4647
self.total_actions = len(generator)
4748

4849
start_time = time.time()

aperturedb/ProgressBar.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,17 @@ def __init__(self, filename="", use_last=0):
1313
else:
1414
self.file = False
1515

16-
self.use_last = 5 if use_last == 0 else use_last
16+
self.use_last = 50 if use_last == 0 else use_last
1717
self.progress = 0.0
18+
self.eta = 0
1819

1920
self.progress_arr = []
2021
self.progress_time = []
2122

2223
self.done = False
2324

25+
self.last_msg_call = time.time()
26+
2427
def __del__(self):
2528

2629
if self.file:
@@ -84,6 +87,14 @@ def compute_stats(self, progress):
8487
if samples >= 2:
8588
avg_progress_per_sec = progress_per_sec / (samples - 1)
8689

90+
now = time.time()
91+
# If we called this function less than a second ago,
92+
# we don't update ETE because jitters too much.
93+
if now - self.last_msg_call < 1 and self.eta > 0:
94+
return
95+
96+
self.last_msg_call = now
97+
8798
if avg_progress_per_sec > 0:
8899
self.eta = (1 - progress) / avg_progress_per_sec
89100
else:
@@ -99,8 +110,8 @@ def compute_stats(self, progress):
99110
self.eta_unit = "s"
100111

101112
if len(self.progress_arr) > self.use_last:
102-
self.progress_arr = self.progress_arr[:self.use_last]
103-
self.progress_time = self.progress_time[:self.use_last]
113+
self.progress_arr = self.progress_arr[-(self.use_last):]
114+
self.progress_time = self.progress_time[-(self.use_last):]
104115

105116
def update(self, progress):
106117

aperturedb/PytorchData.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
from typing import List, Tuple
22
from torch.utils.data import Dataset
3+
from aperturedb.Subscriptable import Subscriptable
34

45

5-
class PytorchData(object):
6+
class PytorchData(Subscriptable):
67
def __init__(self, dataset: Dataset) -> None:
78
self.loaded_dataset = [t for t in dataset]
89

9-
def __getitem__(self, idx: int):
10+
def getitem(self, idx: int):
1011
return self.generate_query(idx)
1112

1213
def __len__(self):

0 commit comments

Comments
 (0)