Skip to content

Commit 72e9f0c

Browse files
authored
Merge pull request #362 from aperture-data/release-0.4.16
Release 0.4.16
2 parents f67f756 + be20f17 commit 72e9f0c

29 files changed

+245
-284
lines changed

.github/workflows/dependencies.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: dependencies
2+
3+
on:
4+
schedule:
5+
- cron: "0 0 * * *"
6+
7+
jobs:
8+
build-dependencies:
9+
10+
runs-on:
11+
- self-hosted
12+
- deployer
13+
14+
steps:
15+
16+
- uses: actions/checkout@v3
17+
18+
- name: Login to DockerHub
19+
uses: docker/login-action@v2
20+
with:
21+
username: ${{ secrets.DOCKER_USER }}
22+
password: ${{ secrets.DOCKER_PASS }}
23+
24+
- name: Build and Push Dependencies Image
25+
env:
26+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
27+
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
28+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
29+
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
30+
run: BUILD_DEPENDENCIES=true PULL_DEPENDENCIES=false PUSH_DEPENDENCIES=true ./ci.sh
31+
shell: bash

.github/workflows/develop.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
3636
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
3737
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
38-
run: 'EXCLUDE_BUILD_COMPLETE=1 EXCLUDE_DEPLOY=1 ./ci.sh'
38+
run: RUN_TESTS=true ./ci.sh
3939
shell: bash
4040

4141
build_and_deploy_docs:
@@ -61,8 +61,7 @@ jobs:
6161
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
6262
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
6363
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
64-
run: |
65-
EXCLUDE_TESTING=1 EXCLUDE_DEPLOY=1 ./ci.sh
64+
run: BUILD_COMPLETE=true ./ci.sh
6665
shell: bash
6766

6867
deploy_web_services:
@@ -87,6 +86,5 @@ jobs:
8786
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
8887
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
8988
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
90-
run: |
91-
EXCLUDE_TESTING=1 ./ci.sh
89+
run: DEPLOY_TERRAFORM=true ./ci.sh
9290
shell: bash

.github/workflows/main.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
3636
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
3737
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
38-
run: 'EXCLUDE_DEPLOY=1 ./ci.sh'
38+
run: RUN_TESTS=true ./ci.sh
3939
shell: bash
4040

4141
build_notebooks_and_publish_pypi:
@@ -61,8 +61,7 @@ jobs:
6161
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
6262
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
6363
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
64-
run: |
65-
EXCLUDE_TESTING=1 EXCLUDE_DEPLOY=1 ./ci.sh
64+
run: BUILD_COMPLETE=true ./ci.sh
6665
shell: bash
6766

6867
- name: Publish to PyPi
@@ -106,8 +105,7 @@ jobs:
106105
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
107106
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
108107
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
109-
run: |
110-
EXCLUDE_TESTING=1 ./ci.sh
108+
run: DEPLOY_TERRAFORM=true ./ci.sh
111109
shell: bash
112110

113111
trigger_demos_buils:

.github/workflows/pr.yaml

Lines changed: 1 addition & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -35,61 +35,5 @@ jobs:
3535
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
3636
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
3737
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
38-
run: 'EXCLUDE_BUILD_COMPLETE=1 EXCLUDE_DEPLOY=1 NO_PUSH=1 BRANCH_NAME=${{ github.event.pull_request.head.ref }} TARGET_BRANCH_NAME=${{ github.event.pull_request.base.ref }} ./ci.sh'
38+
run: RUN_TESTS=true NO_PUSH=true BRANCH_NAME=${{ github.event.pull_request.head.ref }} TARGET_BRANCH_NAME=${{ github.event.pull_request.base.ref }} ./ci.sh
3939
shell: bash
40-
41-
# run_test_conda_gpu:
42-
43-
# runs-on:
44-
# - self-hosted
45-
# - deployer
46-
47-
# steps:
48-
49-
# - uses: actions/checkout@v3
50-
51-
# - name: Login to DockerHub
52-
# uses: docker/login-action@v2
53-
# with:
54-
# username: ${{ secrets.DOCKER_USER }}
55-
# password: ${{ secrets.DOCKER_PASS }}
56-
57-
# - name: Login to Google Cloud
58-
# uses: google-github-actions/setup-gcloud@v0
59-
# with:
60-
# service_account_key: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
61-
# project_id: ${{ secrets.GCP_SERVICE_ACCOUNT_PROJECT_ID }}
62-
# export_default_credentials: true
63-
64-
# - name: Build tests on pytorch GPU image
65-
# run: |
66-
# mkdir -p docker/pytorch-gpu/aperturedata
67-
# cp -r aperturedb pyproject.toml README.md test docker/pytorch-gpu/aperturedata
68-
# bash docker/pytorch-gpu/build.sh
69-
# shell: bash
70-
71-
# - name: Run Tests
72-
# env:
73-
# AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
74-
# AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
75-
# AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
76-
# GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
77-
# run: |
78-
# cd test
79-
# ./run_test_container.sh aperturedata/aperturedb-pytorch-gpu
80-
81-
build_images:
82-
83-
runs-on:
84-
- self-hosted
85-
- deployer
86-
87-
steps:
88-
89-
- uses: actions/checkout@v3
90-
91-
- name: Login to DockerHub
92-
uses: docker/login-action@v2
93-
with:
94-
username: ${{ secrets.DOCKER_USER }}
95-
password: ${{ secrets.DOCKER_PASS }}

.github/workflows/release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
3636
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
3737
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
38-
run: 'EXCLUDE_BUILD_COMPLETE=1 EXCLUDE_DEPLOY=1 NO_PUSH=1 UPDATE_BRANCH=1 ./ci.sh'
38+
run: RUN_TESTS=true NO_PUSH=true UPDATE_BRANCH=true ./ci.sh
3939
shell: bash
4040

4141
build_and_deploy_docs:
@@ -55,5 +55,5 @@ jobs:
5555
password: ${{ secrets.DOCKER_PASS }}
5656

5757
- name: Build Notebook Docker
58-
run: 'EXCLUDE_TESTING=1 EXCLUDE_DEPLOY=1 NO_PUSH=1 ./ci.sh'
58+
run: BUILD_COMPLETE=true NO_PUSH=true ./ci.sh
5959
shell: bash

aperturedb/BBoxDataCSV.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,11 @@ def get_indices(self):
7777
}
7878

7979
def getitem(self, idx):
80-
idx = self.df.index.start + idx
81-
8280
q = []
83-
8481
img_id = self.df.loc[idx, self.img_key]
85-
86-
img_ref = (idx % 99998) + 1
87-
8882
fi = {
8983
"FindImage": {
90-
"_ref": img_ref,
84+
"_ref": 1,
9185
"unique": True,
9286
"constraints": {
9387
self.img_key: ["==", img_id],
@@ -103,7 +97,7 @@ def getitem(self, idx):
10397

10498
rect_attrs = ["x", "y", "width", "height"]
10599
custom_fields = {
106-
"image_ref": img_ref,
100+
"image_ref": 1,
107101
"rectangle": {
108102
attr: val for attr, val in zip(rect_attrs, box_data)
109103
},

aperturedb/BlobDataCSV.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def get_indices(self):
6666
}
6767

6868
def getitem(self, idx):
69-
idx = self.df.index.start + idx
7069
filename = os.path.join(self.relative_path_prefix,
7170
self.df.loc[idx, BLOB_PATH])
7271
blob_ok, blob = self.load_blob(filename)

aperturedb/ConnectionDataCSV.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,14 @@ def get_indices(self):
7777
}
7878

7979
def getitem(self, idx):
80-
idx = self.df.index.start + idx
8180
src_value = self.df.loc[idx, self.header[1]]
8281
dst_value = self.df.loc[idx, self.header[2]]
8382
connection_class = self.df.loc[idx, CONNECTION_CLASS]
8483
q = []
8584
members = ["_Image", "_Blob", "_Video", "_Descriptor"]
8685

8786
try:
88-
ref_src = (2 * idx) % 99998 + 1
87+
ref_src = 1
8988
cmd_params = {
9089
"_ref": ref_src,
9190
"unique": True,
@@ -98,7 +97,7 @@ def getitem(self, idx):
9897
cmd_params["blobs"] = False
9998
q.append(QueryBuilder.find_command(self.src_class, cmd_params))
10099

101-
ref_dst = ref_src + 1
100+
ref_dst = 2
102101
cmd_params = {
103102
"_ref": ref_dst,
104103
"unique": True,

aperturedb/ImageDataCSV.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def load_url(self, url):
129129
else:
130130
if retries >= self.n_download_retries:
131131
break
132-
logger.warning("Retrying object:", url)
132+
logger.warning(f"Retrying object: {url}")
133133
retries += 1
134134
time.sleep(2)
135135

@@ -147,7 +147,7 @@ def load_s3_url(self, s3_url):
147147
img = s3_response_object['Body'].read()
148148
imgbuffer = np.frombuffer(img, dtype='uint8')
149149
if not self.check_image_buffer(imgbuffer):
150-
logger.error(f"IMAGE ERROR:{s3_url} ")
150+
logger.error(f"IMAGE ERROR: {s3_url}")
151151
return False, None
152152

153153
return True, img
@@ -281,7 +281,11 @@ def getitem(self, idx):
281281
if self.format_given:
282282
custom_fields["format"] = self.df.loc[idx, IMG_FORMAT]
283283
ai = self._basic_command(idx, custom_fields)
284-
ai[self.command]["_ref"] = (idx % 99998) + 1
284+
# Each getitem query should be properly defined with a ref.
285+
# A ref shouldb be added to each of the commands from getitem implementation.
286+
# This is because a transformer or ref updater in the PQ
287+
# will need to know which command to update.
288+
ai[self.command]["_ref"] = 1
285289
blobs.append(img)
286290
q.append(ai)
287291

aperturedb/ParallelQuery.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,36 @@ def __init__(self, db, dry_run=False):
138138

139139
def generate_batch(self, data):
140140
"""
141-
Here we flatten the individual queries to run them as
142-
a single query in a batch
141+
Here we flatten the individual queries to run them as
142+
a single query in a batch
143+
We also update the _ref values and connections refs.
143144
"""
144-
q = [cmd for query in data for cmd in query[0]]
145+
def update_refs(batched_commands):
146+
updates = {}
147+
for i, cmd in enumerate(batched_commands):
148+
if isinstance(cmd, list):
149+
# Only pralllel queries will work.
150+
break
151+
values = cmd[list(cmd.keys())[0]]
152+
if "_ref" in values:
153+
updates[values["_ref"]] = i + 1
154+
values["_ref"] = i + 1
155+
assert values["_ref"] < 100000
156+
if "image_ref" in values:
157+
values["image_ref"] = updates[values["image_ref"]]
158+
if "video_ref" in values:
159+
values["video_ref"] = updates[values["video_ref"]]
160+
if "is_connected_to" in values and "_ref" in values["is_connected_to"]:
161+
values["is_connected_to"]["_ref"] = updates[values["is_connected_to"]["_ref"]]
162+
if "connect" in values and "ref" in values["connect"]:
163+
values["connect"]["ref"] = updates[values["connect"]["ref"]]
164+
if "src" in values:
165+
values["src"] = updates[values["src"]]
166+
if "dst" in values:
167+
values["dst"] = updates[values["dst"]]
168+
return batched_commands
169+
170+
q = update_refs([cmd for query in data for cmd in query[0]])
145171
blobs = [blob for query in data for blob in query[1]]
146172

147173
return q, blobs
@@ -234,6 +260,7 @@ def worker(self, thid, generator, start, end):
234260
if (end - start) % self.batchsize > 0:
235261
total_batches += 1
236262

263+
logger.info(f"Worker {thid} executing {total_batches} batches")
237264
for i in range(total_batches):
238265

239266
batch_start = start + i * self.batchsize
@@ -306,8 +333,7 @@ def query(self, generator, batchsize=1, numthreads=4, stats=False):
306333
# if len(generator[0]) > 0:
307334
#
308335
# Not applicable to old style loaders.
309-
self.commands_per_query = min(
310-
len(generator[0][0]), batchsize)
336+
self.commands_per_query = len(generator[0][0])
311337
if len(generator[0][1]):
312338
self.blobs_per_query = len(generator[0][1])
313339
else:

0 commit comments

Comments
 (0)