Skip to content

Commit 00b7399

Browse files
committed
caching
1 parent 29a8a19 commit 00b7399

File tree

3 files changed

+101
-25
lines changed

3 files changed

+101
-25
lines changed

lib/bald/__init__.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,41 @@
22
import re
33

44
import h5py
5+
import requests
56

67
from bald.validation import ContainerValidation, DatasetValidation
78

9+
10+
class HttpStatusCache(object):
11+
def __init__(self):
12+
self.cache = {}
13+
14+
def __getitem__(self, item):
15+
uri = item
16+
if item not in self.cache:
17+
if not uri.startswith('http://') or uri.startswith('https://'):
18+
raise ValueError('{} is not a HTTP URI.'.format(item))
19+
if item in self.cache:
20+
result = self.cache[item]
21+
else:
22+
r = requests.get(uri)
23+
if r.status_code == 200:
24+
headers={'Accept':'text/turtle'}
25+
rraw = requests.get(uri, headers=headers)
26+
self.cache[item] = rraw.status_code
27+
else:
28+
self.cache[item] = r.status_code
29+
30+
result = self.cache[item]
31+
return result
32+
33+
34+
def check_uri(self, uri):
35+
result = False
36+
if self[uri] == 200:
37+
result = True
38+
return result
39+
840
class Subject(object):
941
def __init__(self, attrs=None):
1042
"""
@@ -61,14 +93,19 @@ def validate_hdf5(afilepath):
6193

6294
with load(afilepath) as fhandle:
6395
valid = True
96+
cache = {}
6497
root_container = Subject(fhandle.attrs)
65-
root_val = ContainerValidation(root_container)
98+
root_val = ContainerValidation(subject=root_container)
6699
if not root_val.is_valid():
67100
valid = False
68101
# iterate through the datasets
69102
for name, dataset in fhandle.items():
70-
dset = Subject(dataset.attrs)
71-
dset_val = DatasetValidation(name, dataset, dset, fhandle)
103+
# a dataset's attribute collection inherits from and specialises it's
104+
# container's attrbiute collection
105+
sattrs = dict(fhandle.attrs).copy()
106+
sattrs.update(dataset.attrs)
107+
dset = Subject(sattrs)
108+
dset_val = DatasetValidation(name, dataset, fhandle, subject=dset)
72109
if not dset_val.is_valid():
73110
valid = False
74111

lib/bald/tests/integration/test_validation.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,30 +40,76 @@ def test_invalid_uri(self):
4040
self.assertFalse(bald.validate_hdf5(tfile))
4141

4242
class TestArrayReference(BaldTestCase):
43-
def test_match_array_reference(self):
43+
def test_match(self):
4444
with self.temp_filename('.hdf') as tfile:
4545
f = h5py.File(tfile, "w")
4646
f = _fattrs(f)
4747
f = _create_parent_child(f, (11, 17), (11, 17))
4848
f.close()
4949
self.assertTrue(bald.validate_hdf5(tfile))
5050

51-
def test_misatch_zeroth_array_reference(self):
51+
def test_mismatch_zeroth(self):
5252
with self.temp_filename('.hdf') as tfile:
5353
f = h5py.File(tfile, "w")
5454
f = _fattrs(f)
5555
f = _create_parent_child(f, (11, 17), (11, 13))
5656
f.close()
5757
self.assertFalse(bald.validate_hdf5(tfile))
5858

59-
def test_misatch_oneth_array_reference(self):
59+
def test_mismatch_oneth(self):
6060
with self.temp_filename('.hdf') as tfile:
6161
f = h5py.File(tfile, "w")
6262
f = _fattrs(f)
6363
f = _create_parent_child(f, (11, 17), (13, 17))
6464
f.close()
6565
self.assertFalse(bald.validate_hdf5(tfile))
6666

67+
def test_match_plead_dim(self):
68+
with self.temp_filename('.hdf') as tfile:
69+
f = h5py.File(tfile, "w")
70+
f = _fattrs(f)
71+
# parent has leading dimension wrt child
72+
f = _create_parent_child(f, (4, 13, 17), (13, 17))
73+
f.close()
74+
self.assertTrue(bald.validate_hdf5(tfile))
75+
76+
def test_match_clead_dim(self):
77+
with self.temp_filename('.hdf') as tfile:
78+
f = h5py.File(tfile, "w")
79+
f = _fattrs(f)
80+
# child has leading dimension wrt parent
81+
f = _create_parent_child(f, (13, 17), (7, 13, 17))
82+
f.close()
83+
self.assertTrue(bald.validate_hdf5(tfile))
84+
85+
def test_mismatch_pdisjc_lead_dim(self):
86+
with self.temp_filename('.hdf') as tfile:
87+
f = h5py.File(tfile, "w")
88+
f = _fattrs(f)
89+
# child and parent have disjoint leading dimensions
90+
f = _create_parent_child(f, (4, 13, 17), (7, 13, 17))
91+
f.close()
92+
self.assertFalse(bald.validate_hdf5(tfile))
93+
94+
def test_mismatch_pdisjc_trail_dim(self):
95+
with self.temp_filename('.hdf') as tfile:
96+
f = h5py.File(tfile, "w")
97+
f = _fattrs(f)
98+
# child and parent have disjoint trailing dimensions
99+
f = _create_parent_child(f, (13, 17, 2), (13, 17, 9))
100+
f.close()
101+
self.assertFalse(bald.validate_hdf5(tfile))
102+
103+
104+
105+
# def test_match_(self):
106+
# with self.temp_filename('.hdf') as tfile:
107+
# f = h5py.File(tfile, "w")
108+
# f = _fattrs(f)
109+
# #
110+
# f = _create_parent_child(f, (), ())
111+
# f.close()
112+
# self.assert(bald.validate_hdf5(tfile))
67113

68114
if __name__ == '__main__':
69115
unittest.main()

lib/bald/validation.py

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,8 @@
11

22
import numpy as np
3-
import requests
43

54
import bald
65

7-
def check_uri(uri):
8-
result = False
9-
if uri.startswith('http://') or uri.startswith('https://'):
10-
r = requests.get(uri)
11-
if r.status_code == 200:
12-
headers={'Accept':'text/turtle'}
13-
rraw = requests.get(uri, headers=headers)
14-
if rraw.status_code == 200:
15-
result = True
16-
return result
176

187
def valid_array_reference(parray, carray):
198
"""
@@ -51,8 +40,13 @@ def exceptions(self):
5140

5241
class SubjectValidation(Validation):
5342

54-
def __init__(self, subject):
43+
def __init__(self, subject, httpcache=None):
5544
self.subject = subject
45+
if isinstance(httpcache, bald.HttpStatusCache):
46+
self.cache = httpcache
47+
else:
48+
self.cache = bald.HttpStatusCache()
49+
5650

5751
def is_valid(self):
5852
return not self.exceptions()
@@ -64,8 +58,7 @@ def exceptions(self):
6458

6559
def check_attr_uris(self, exceptions):
6660
def _check_uri(uri, exceptions):
67-
if not check_uri(uri):
68-
check_uri(uri)
61+
if not self.cache.check_uri(uri):
6962
msg = '{} is not resolving as a resource (404).'
7063
msg = msg.format(uri)
7164
exceptions.append(ValueError(msg))
@@ -85,9 +78,8 @@ def _check_uri(uri, exceptions):
8578

8679
class ContainerValidation(SubjectValidation):
8780

88-
def __init__(self, container):
89-
self.container = container
90-
self.subject = container
81+
def __init__(self, **kwargs):
82+
super(ContainerValidation, self).__init__(**kwargs)
9183

9284
def exceptions(self):
9385
exceptions = []
@@ -97,11 +89,12 @@ def exceptions(self):
9789
class DatasetValidation(SubjectValidation):
9890

9991

100-
def __init__(self, name, dataset, subject, fhandle):
92+
def __init__(self, name, dataset, fhandle, **kwargs):
93+
10194
self.dataset = dataset
102-
self.subject = subject
10395
self.name = name
10496
self.fhandle = fhandle
97+
super(DatasetValidation, self).__init__(**kwargs)
10598

10699
def exceptions(self):
107100
exceptions = []

0 commit comments

Comments
 (0)