Skip to content

Commit 36321f5

Browse files
committed
Merge pull request #1 from marqh/validation
Validation
2 parents 285332a + e76e053 commit 36321f5

File tree

12 files changed

+521
-0
lines changed

12 files changed

+521
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,6 @@ target/
6060

6161
#Ipython Notebook
6262
.ipynb_checkpoints
63+
64+
*~
65+

lib/bald/__init__.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import contextlib
2+
import re
3+
4+
import h5py
5+
import requests
6+
7+
from bald.validation import ContainerValidation, DatasetValidation
8+
9+
10+
class HttpCache(object):
11+
"""
12+
Requests cache.
13+
"""
14+
def __init__(self):
15+
self.cache = {}
16+
17+
def __getitem__(self, item):
18+
19+
if not item.startswith('http://') or item.startswith('https://'):
20+
raise ValueError('{} is not a HTTP URI.'.format(item))
21+
if item not in self.cache:
22+
headers={'Accept':'text/turtle'}
23+
self.cache[item] = requests.get(item, headers=headers)
24+
25+
return self.cache[item]
26+
27+
28+
def check_uri(self, uri):
29+
result = False
30+
if self[uri].status_code == 200:
31+
result = True
32+
return result
33+
34+
class Subject(object):
35+
def __init__(self, attrs=None):
36+
"""
37+
A subject of metadata statements.
38+
39+
attrs: an dictionary of key value pair attributes
40+
"""
41+
if attrs is None:
42+
attrs = []
43+
self.attrs = attrs
44+
self._prefix_suffix = re.compile('(^(?:(?!__).)*)__((?!.*__).*$)')
45+
_http_p = 'http[s]?://.*'
46+
self._http_uri = re.compile('{}'.format(_http_p))
47+
self._http_uri_prefix = re.compile('{}/|#'.format(_http_p))
48+
49+
50+
def prefixes(self):
51+
prefixes = {}
52+
for key, value in self.attrs.iteritems():
53+
if key.endswith('__') and self._http_uri_prefix.match(value):
54+
pref = key.rstrip('__')
55+
if prefixes.has_key(pref):
56+
raise ValueError('This container has conflicting prefix definitions')
57+
prefixes[pref] = value
58+
return prefixes
59+
60+
61+
def unpack_uri(self, astring):
62+
result = astring
63+
if self._prefix_suffix.match(astring):
64+
prefix, suffix = self._prefix_suffix.match(astring).groups()
65+
66+
if self.prefixes().has_key(prefix):
67+
if self._http_uri.match(self.prefixes()[prefix]):
68+
result = astring.replace('{}__'.format(prefix), self.prefixes()[prefix])
69+
return result
70+
71+
@contextlib.contextmanager
72+
def load(afilepath):
73+
if afilepath.endswith('.hdf'):
74+
loader = h5py.File
75+
else:
76+
raise ValueError('filepath suffix not supported')
77+
try:
78+
f = loader(afilepath, "r")
79+
yield f
80+
finally:
81+
f.close()
82+
83+
84+
def validate_hdf5(afilepath):
85+
"""
86+
Validate a file with respect ot binarry-array-linked-data.
87+
Returns a :class:`bald.validation.Validation`
88+
"""
89+
90+
with load(afilepath) as fhandle:
91+
valid = True
92+
cache = {}
93+
root_container = Subject(fhandle.attrs)
94+
root_val = ContainerValidation(subject=root_container, fhandle=fhandle)
95+
if not root_val.is_valid():
96+
valid = False
97+
# iterate through the datasets
98+
for name, dataset in fhandle.items():
99+
# a dataset's attribute collection inherits from and specialises it's
100+
# container's attrbiute collection
101+
sattrs = dict(fhandle.attrs).copy()
102+
sattrs.update(dataset.attrs)
103+
dset = Subject(sattrs)
104+
dset_val = DatasetValidation(name, dataset, fhandle=fhandle, subject=dset)
105+
if not dset_val.is_valid():
106+
valid = False
107+
108+
return valid
109+
110+
111+
112+

lib/bald/tests/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import contextlib
2+
import os
3+
import tempfile
4+
import unittest
5+
6+
7+
class BaldTestCase(unittest.TestCase):
8+
@contextlib.contextmanager
9+
def temp_filename(self, suffix=''):
10+
temp_file = tempfile.mkstemp(suffix)
11+
os.close(temp_file[0])
12+
filename = temp_file[1]
13+
try:
14+
yield filename
15+
finally:
16+
os.remove(filename)

lib/bald/tests/__init__.pyc

879 Bytes
Binary file not shown.

lib/bald/tests/integration/__init__.py

Whitespace-only changes.
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import unittest
2+
3+
import h5py
4+
import numpy as np
5+
6+
import bald
7+
from bald.tests import BaldTestCase
8+
9+
def _fattrs(f):
10+
f.attrs['bald__'] = 'http://binary-array-ld.net/experimental/'
11+
f.attrs['rdf__'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
12+
f.attrs['rdf__type'] = 'bald__Container'
13+
return f
14+
15+
def _create_parent_child(f, pshape, cshape):
16+
dsetp = f.create_dataset("parent_dataset", pshape, dtype='i')
17+
dsetc = f.create_dataset("child_dataset", cshape, dtype='i')
18+
dsetp.attrs['rdf__type'] = 'bald__Dataset'
19+
dsetp.attrs['bald__references'] = dsetc.ref
20+
dsetc.attrs['rdf__type'] = 'bald__Dataset'
21+
dsetc.attrs['rdf__type'] = 'bald__Reference'
22+
dsetc.attrs['bald__dataset'] = dsetc.ref
23+
return f
24+
25+
26+
class Test(BaldTestCase):
27+
28+
def test_valid_uri(self):
29+
with self.temp_filename('.hdf') as tfile:
30+
f = h5py.File(tfile, "w")
31+
f = _fattrs(f)
32+
f = _create_parent_child(f, (11, 17), (11, 17))
33+
f.close()
34+
self.assertTrue(bald.validate_hdf5(tfile))
35+
36+
def test_invalid_uri(self):
37+
with self.temp_filename('.hdf') as tfile:
38+
f = h5py.File(tfile, "w")
39+
f = _fattrs(f)
40+
f = _create_parent_child(f, (11, 17), (11, 17))
41+
f.attrs['bald__turtle'] = 'bald__walnut'
42+
f.close()
43+
self.assertFalse(bald.validate_hdf5(tfile))
44+
45+
class TestArrayReference(BaldTestCase):
46+
def test_match(self):
47+
with self.temp_filename('.hdf') as tfile:
48+
f = h5py.File(tfile, "w")
49+
f = _fattrs(f)
50+
f = _create_parent_child(f, (11, 17), (11, 17))
51+
f.close()
52+
self.assertTrue(bald.validate_hdf5(tfile))
53+
54+
def test_mismatch_zeroth(self):
55+
with self.temp_filename('.hdf') as tfile:
56+
f = h5py.File(tfile, "w")
57+
f = _fattrs(f)
58+
f = _create_parent_child(f, (11, 17), (11, 13))
59+
f.close()
60+
self.assertFalse(bald.validate_hdf5(tfile))
61+
62+
def test_mismatch_oneth(self):
63+
with self.temp_filename('.hdf') as tfile:
64+
f = h5py.File(tfile, "w")
65+
f = _fattrs(f)
66+
f = _create_parent_child(f, (11, 17), (13, 17))
67+
f.close()
68+
self.assertFalse(bald.validate_hdf5(tfile))
69+
70+
def test_match_plead_dim(self):
71+
with self.temp_filename('.hdf') as tfile:
72+
f = h5py.File(tfile, "w")
73+
f = _fattrs(f)
74+
# parent has leading dimension wrt child
75+
f = _create_parent_child(f, (4, 13, 17), (13, 17))
76+
f.close()
77+
self.assertTrue(bald.validate_hdf5(tfile))
78+
79+
def test_match_clead_dim(self):
80+
with self.temp_filename('.hdf') as tfile:
81+
f = h5py.File(tfile, "w")
82+
f = _fattrs(f)
83+
# child has leading dimension wrt parent
84+
f = _create_parent_child(f, (13, 17), (7, 13, 17))
85+
f.close()
86+
self.assertTrue(bald.validate_hdf5(tfile))
87+
88+
def test_mismatch_pdisjc_lead_dim(self):
89+
with self.temp_filename('.hdf') as tfile:
90+
f = h5py.File(tfile, "w")
91+
f = _fattrs(f)
92+
# child and parent have disjoint leading dimensions
93+
f = _create_parent_child(f, (4, 13, 17), (7, 13, 17))
94+
95+
f.close()
96+
self.assertFalse(bald.validate_hdf5(tfile))
97+
98+
def test_mismatch_pdisjc_trail_dim(self):
99+
with self.temp_filename('.hdf') as tfile:
100+
f = h5py.File(tfile, "w")
101+
f = _fattrs(f)
102+
# child and parent have disjoint trailing dimensions
103+
f = _create_parent_child(f, (13, 17, 2), (13, 17, 9))
104+
f.close()
105+
self.assertFalse(bald.validate_hdf5(tfile))
106+
107+
108+
109+
# def test_match_(self):
110+
# with self.temp_filename('.hdf') as tfile:
111+
# f = h5py.File(tfile, "w")
112+
# f = _fattrs(f)
113+
# #
114+
# f = _create_parent_child(f, (), ())
115+
# f.close()
116+
# self.assert(bald.validate_hdf5(tfile))
117+
118+
if __name__ == '__main__':
119+
unittest.main()
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import unittest
2+
3+
import h5py
4+
import numpy as np
5+
6+
from bald.tests import BaldTestCase
7+
8+
def _fattrs(f):
9+
f.attrs['bald__'] = 'http://binary_array_ld.net/experimental'
10+
f.attrs['bald__type'] = 'bald__Container'
11+
return f
12+
13+
def _create_parent_child(f, pshape, cshape):
14+
dsetp = f.create_dataset("parent_dataset", pshape, dtype='i')
15+
dsetc = f.create_dataset("child_dataset", cshape, dtype='i')
16+
dsetp.attrs['bald__type'] = 'bald__Dataset'
17+
dsetp.attrs['bald__reference'] = dsetc.ref
18+
dsetc.attrs['bald__type'] = 'bald__Dataset'
19+
return f
20+
21+
22+
class TestArrayReference(BaldTestCase):
23+
def test_match_array_reference(self):
24+
with self.temp_filename('.hdf') as tfile:
25+
f = h5py.File(tfile, "w")
26+
f = _fattrs(f)
27+
f = _create_parent_child(f, (11, 17), (11, 17))
28+
f.close()
29+
30+
31+
if __name__ == '__main__':
32+
unittest.main()

lib/bald/tests/test_simple.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import unittest
2+
3+
import h5py
4+
import numpy as np
5+
6+
from bald.tests import BaldTestCase
7+
8+
9+
class Test(BaldTestCase):
10+
def setUp(self):
11+
self.this = 'this'
12+
13+
def test_this(self):
14+
self.assertTrue(self.this, 'this')
15+
16+
def test_make_file(self):
17+
with self.temp_filename('.hdf') as tfile:
18+
f = h5py.File(tfile, "w")
19+
dset = f.create_dataset("mydataset", (100,), dtype='i')
20+
21+
def test_make_load_file(self):
22+
with self.temp_filename('.hdf') as tfile:
23+
f = h5py.File(tfile, "w")
24+
dset = f.create_dataset("mydataset", (100,), dtype='i')
25+
dset.attrs['bald__'] = 'http://binary_array_ld.net/experimental'
26+
f.close()
27+
newf = h5py.File(tfile, "r")
28+
nset = newf.get('mydataset')
29+
self.assertEqual(nset.shape, (100,))
30+
self.assertEqual(nset.dtype, 'int32')
31+
self.assertEqual(nset.attrs.get('bald__'),
32+
'http://binary_array_ld.net/experimental')
33+
34+
35+
if __name__ == '__main__':
36+
unittest.main()

lib/bald/tests/unit/__init__.py

Whitespace-only changes.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import unittest
2+
3+
import h5py
4+
import numpy as np
5+
6+
from bald.tests import BaldTestCase
7+
from bald import validation
8+
9+
class Test(unittest.TestCase):
10+
def test_check_uri_200(self):
11+
auri = 'http://binary-array-ld.net/experimental'
12+
self.assertTrue(validation.check_uri(auri))
13+
14+
def test_check_uri_404(self):
15+
notauri = 'http://binary-array-ld.net/experimentalish'
16+
self.assertFalse(validation.check_uri(notauri))
17+
18+
19+
if __name__ == '__main__':
20+
unittest.main()

0 commit comments

Comments
 (0)