Skip to content

Commit f7b59ae

Browse files
authored
Merge pull request #208 from aperture-data/release-0.4.1
Addresses connection handling on fork.
2 parents bbd6c32 + 58b6188 commit f7b59ae

File tree

7 files changed

+49
-42
lines changed

7 files changed

+49
-42
lines changed

.github/workflows/main.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
# This workflow contains a single job called "build-test"
2020
build-test:
2121
# The type of runner that the job will run on Ubuntu 18.04 (latest)
22-
runs-on: ubuntu-latest
22+
runs-on: self-hosted
2323

2424
# Steps represent a sequence of tasks that will be
2525
# executed as part of the job
@@ -61,7 +61,7 @@ jobs:
6161
- build-test
6262

6363
# The type of runner that the job will run on Ubuntu 18.04 (latest)
64-
runs-on: ubuntu-latest
64+
runs-on: self-hosted
6565

6666
# Steps represent a sequence of tasks that will be
6767
# executed as part of the job

.github/workflows/pr.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515
# that can run sequentially or in parallel
1616
jobs:
1717
run_test:
18-
runs-on: ubuntu-latest
18+
runs-on: self-hosted
1919

2020
# Steps represent a sequence of tasks that will be
2121
# executed as part of the job
@@ -50,7 +50,7 @@ jobs:
5050
BRANCH_NAME=${{ github.event.pull_request.head.ref }} ./run_test_container.sh
5151
5252
run_test_conda_gpu:
53-
runs-on: ubuntu-latest
53+
runs-on: self-hosted
5454

5555
# Steps represent a sequence of tasks that will be
5656
# executed as part of the job
@@ -88,7 +88,7 @@ jobs:
8888
./run_test_container.sh aperturedata/aperturedb-pytorch-gpu
8989
9090
build_images:
91-
runs-on: ubuntu-latest
91+
runs-on: self-hosted
9292
steps:
9393
# Checks-out your repository under $GITHUB_WORKSPACE,
9494
# so your job can access it

.github/workflows/release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
# This workflow contains a single job called "build-test"
2020
build-test:
2121
# The type of runner that the job will run on Ubuntu 18.04 (latest)
22-
runs-on: ubuntu-latest
22+
runs-on: self-hosted
2323

2424
# Steps represent a sequence of tasks that will be
2525
# executed as part of the job
@@ -58,7 +58,7 @@ jobs:
5858
build_and_deploy_docs:
5959

6060
# The type of runner that the job will run on Ubuntu 18.04 (latest)
61-
runs-on: ubuntu-latest
61+
runs-on: self-hosted
6262

6363
# Steps represent a sequence of tasks that will be
6464
# executed as part of the job

aperturedb/Connector.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -287,25 +287,27 @@ def _query(self, query, blob_array = []):
287287

288288
tries = 0
289289
while tries < 3:
290-
if self._send_msg(data):
291-
response = self._recv_msg()
292-
if response is not None:
293-
break
294-
290+
try:
291+
if self._send_msg(data):
292+
response = self._recv_msg()
293+
if response is not None:
294+
querRes = queryMessage_pb2.queryMessage()
295+
querRes.ParseFromString(response)
296+
response_blob_array = [b for b in querRes.blobs]
297+
self.last_response = json.loads(querRes.json)
298+
break
299+
except ssl.SSLError as e:
300+
# This can happen in a scenario where multiple
301+
# processes might be accessing a single connection.
302+
# The copy does not make usable connections.
303+
logger.warning(f"Socket error on process {os.getpid()}")
295304
tries += 1
296-
logger.error(
297-
f"Connection broken. Reconnectng attempt [{tries}/3] ..")
298-
time.sleep(5)
305+
logger.warning(
306+
f"Connection broken. Reconnectng attempt [{tries}/3] .. PID = {os.getpid()}")
307+
time.sleep(1)
299308
self._connect()
300309
self._renew_session()
301310

302-
querRes = queryMessage_pb2.queryMessage()
303-
querRes.ParseFromString(response)
304-
305-
response_blob_array = [b for b in querRes.blobs]
306-
307-
self.last_response = json.loads(querRes.json)
308-
309311
return (self.last_response, response_blob_array)
310312

311313
def query(self, q, blobs=[]):

aperturedb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
logger = logging.getLogger(__name__)
99

10-
__version__ = "0.4.0"
10+
__version__ = "0.4.1"
1111

1212
# set log level
1313
logger.setLevel(logging.DEBUG)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
setuptools.setup(
2121
name="aperturedb",
22-
version="0.4.0",
22+
version="0.4.1",
2323
description="ApertureDB Client Module",
2424
install_requires=install_requires,
2525
long_description=long_description,

test/test_torch_connector.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,34 @@
11
import time
22
import os
33
import logging
4+
from typing import Union
45

56
import torch
67
import torch.distributed as dist
78
from aperturedb import Images
89
from aperturedb import PyTorchDataset
10+
from torch.utils.data.dataloader import DataLoader
11+
from torch.utils.data.dataset import Dataset
912

1013
logger = logging.getLogger(__name__)
1114

1215

1316
class TestTorchDatasets():
14-
def validate_dataset(self, dataset):
17+
def validate_dataset(self, dataset: Union[DataLoader, Dataset], expected_length):
1518
start = time.time()
1619

20+
count = 0
1721
# Iterate over dataset.
1822
for img in dataset:
1923
if len(img[0]) < 0:
2024
logger.error("Empty image?")
2125
assert True == False
26+
count += len(img[1]) if isinstance(dataset, DataLoader) else 1
27+
assert count == expected_length
2228

23-
logger.info("\n")
24-
logger.info("Throughput (imgs/s):",
25-
len(dataset) / (time.time() - start))
29+
time_taken = time.time() - start
30+
if time_taken != 0:
31+
logger.info(f"Throughput (imgs/s): {len(dataset) / time_taken}")
2632

2733
def test_omConstraints(self, db, utils, images):
2834
assert len(images) > 0
@@ -31,8 +37,7 @@ def test_omConstraints(self, db, utils, images):
3137
dataset = PyTorchDataset.ApertureDBDatasetConstraints(
3238
db, constraints=const)
3339

34-
assert len(dataset) == utils.count_images()
35-
self.validate_dataset(dataset)
40+
self.validate_dataset(dataset, utils.count_images())
3641

3742
def test_nativeContraints(self, db, utils, images):
3843
assert len(images) > 0
@@ -57,10 +62,10 @@ def test_nativeContraints(self, db, utils, images):
5762
dataset = PyTorchDataset.ApertureDBDataset(
5863
db, query, label_prop="license")
5964

60-
assert len(dataset) == utils.count_images()
61-
self.validate_dataset(dataset)
65+
self.validate_dataset(dataset, utils.count_images())
6266

6367
def test_datasetWithMultiprocessing(self, db, utils):
68+
len_limit = utils.count_images()
6469
query = [{
6570
"FindImage": {
6671
"constraints": {
@@ -74,16 +79,16 @@ def test_datasetWithMultiprocessing(self, db, utils):
7479
}
7580
],
7681
"results": {
77-
"list": ["license"]
82+
"list": ["license"],
83+
"limit": len_limit
7884
}
7985
}
8086
}]
8187

8288
dataset = PyTorchDataset.ApertureDBDataset(
8389
db, query, label_prop="license")
8490

85-
assert len(dataset) == utils.count_images()
86-
self.validate_dataset(dataset)
91+
self.validate_dataset(dataset, len_limit)
8792
# Distributed Data Loader Setup
8893

8994
# Needed for init_process_group
@@ -93,30 +98,30 @@ def test_datasetWithMultiprocessing(self, db, utils):
9398
dist.init_process_group("gloo", rank=0, world_size=1)
9499

95100
# === Distributed Data Loader Sequential
96-
97-
data_loader = torch.utils.data.DataLoader(
101+
batch_size = 10
102+
data_loader = DataLoader(
98103
dataset,
99-
batch_size=10, # pick random values here to test
104+
batch_size=batch_size, # pick random values here to test
100105
num_workers=4, # num_workers > 1 to test multiprocessing works
101106
pin_memory=True,
102107
drop_last=True,
103108
)
104109

105-
self.validate_dataset(data_loader)
110+
self.validate_dataset(data_loader, len_limit)
106111
# === Distributed Data Loader Shuffler
107112

108113
# This will generate a random sampler, which will make the use
109114
# of batching wasteful
110115
sampler = torch.utils.data.DistributedSampler(
111116
dataset, shuffle=True)
112117

113-
data_loader = torch.utils.data.DataLoader(
118+
data_loader = DataLoader(
114119
dataset,
115120
sampler=sampler,
116-
batch_size=10, # pick random values here to test
121+
batch_size=batch_size, # pick random values here to test
117122
num_workers=4, # num_workers > 1 to test multiprocessing works
118123
pin_memory=True,
119124
drop_last=True,
120125
)
121126

122-
self.validate_dataset(data_loader)
127+
self.validate_dataset(data_loader, len_limit)

0 commit comments

Comments
 (0)