Skip to content

Commit 29a8a19

Browse files
committed
broadcast
1 parent c6d514e commit 29a8a19

File tree

3 files changed

+129
-22
lines changed

3 files changed

+129
-22
lines changed

lib/bald/__init__.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
import h5py
55

6-
from bald.validation import ContainerValidation
6+
from bald.validation import ContainerValidation, DatasetValidation
77

8-
class Container(object):
8+
class Subject(object):
99
def __init__(self, attrs=None):
1010
"""
1111
attrs: an dictionary of key value pair attributes
@@ -60,9 +60,19 @@ def validate_hdf5(afilepath):
6060
"""
6161

6262
with load(afilepath) as fhandle:
63-
root_container = Container(fhandle.attrs)
63+
valid = True
64+
root_container = Subject(fhandle.attrs)
6465
root_val = ContainerValidation(root_container)
65-
return root_val.is_valid()
66+
if not root_val.is_valid():
67+
valid = False
68+
# iterate through the datasets
69+
for name, dataset in fhandle.items():
70+
dset = Subject(dataset.attrs)
71+
dset_val = DatasetValidation(name, dataset, dset, fhandle)
72+
if not dset_val.is_valid():
73+
valid = False
74+
75+
return valid
6676

6777

6878

lib/bald/tests/integration/test_validation.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,30 @@ def test_invalid_uri(self):
3939
f.close()
4040
self.assertFalse(bald.validate_hdf5(tfile))
4141

42-
# class TestArrayReference(BaldTestCase):
43-
# def test_match_array_reference(self):
44-
# with self.temp_filename('.hdf') as tfile:
45-
# f = h5py.File(tfile, "w")
46-
# f = _fattrs(f)
47-
# f = _create_parent_child(f, (11, 17), (11, 17))
48-
# f.close()
42+
class TestArrayReference(BaldTestCase):
43+
def test_match_array_reference(self):
44+
with self.temp_filename('.hdf') as tfile:
45+
f = h5py.File(tfile, "w")
46+
f = _fattrs(f)
47+
f = _create_parent_child(f, (11, 17), (11, 17))
48+
f.close()
49+
self.assertTrue(bald.validate_hdf5(tfile))
4950

51+
def test_misatch_zeroth_array_reference(self):
52+
with self.temp_filename('.hdf') as tfile:
53+
f = h5py.File(tfile, "w")
54+
f = _fattrs(f)
55+
f = _create_parent_child(f, (11, 17), (11, 13))
56+
f.close()
57+
self.assertFalse(bald.validate_hdf5(tfile))
58+
59+
def test_misatch_oneth_array_reference(self):
60+
with self.temp_filename('.hdf') as tfile:
61+
f = h5py.File(tfile, "w")
62+
f = _fattrs(f)
63+
f = _create_parent_child(f, (11, 17), (13, 17))
64+
f.close()
65+
self.assertFalse(bald.validate_hdf5(tfile))
5066

5167

5268
if __name__ == '__main__':

lib/bald/validation.py

Lines changed: 92 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11

2+
import numpy as np
23
import requests
34

45
import bald
@@ -14,7 +15,30 @@ def check_uri(uri):
1415
result = True
1516
return result
1617

18+
def valid_array_reference(parray, carray):
19+
"""
20+
Returns boolean.
21+
Validates bald array broadcasting rules between a parent array and a child array.
1722
23+
Args:
24+
* parray - a numpy array: the parent of a bald array reference relation
25+
26+
* carray - a numpy array: the child of a bald array reference relation
27+
"""
28+
# https://github.com/SciTools/courses/blob/master/course_content/notebooks/numpy_intro.ipynb
29+
# https://cs231n.github.io/python-numpy-tutorial/#numpy-broadcasting
30+
# http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
31+
# http://scipy.github.io/old-wiki/pages/EricsBroadcastingDoc
32+
33+
result = True
34+
# try numpy broadcast
35+
try:
36+
_ = np.broadcast(parray, carray)
37+
except ValueError:
38+
result = False
39+
40+
return result
41+
1842

1943
class Validation(object):
2044

@@ -25,9 +49,10 @@ def exceptions(self):
2549
exceptions = []
2650
return exceptions
2751

28-
class ContainerValidation(Validation):
29-
def __init__(self, container):
30-
self.container = container
52+
class SubjectValidation(Validation):
53+
54+
def __init__(self, subject):
55+
self.subject = subject
3156

3257
def is_valid(self):
3358
return not self.exceptions()
@@ -40,23 +65,79 @@ def exceptions(self):
4065
def check_attr_uris(self, exceptions):
4166
def _check_uri(uri, exceptions):
4267
if not check_uri(uri):
68+
check_uri(uri)
4369
msg = '{} is not resolving as a resource (404).'
4470
msg = msg.format(uri)
4571
exceptions.append(ValueError(msg))
4672
return exceptions
4773

48-
for pref, uri in self.container.prefixes().iteritems():
49-
exceptions = _check_uri(self.container.unpack_uri(uri),
50-
exceptions)
51-
for attr, value in self.container.attrs.iteritems():
52-
exceptions = _check_uri(self.container.unpack_uri(attr),
74+
for pref, uri in self.subject.prefixes().iteritems():
75+
exceptions = _check_uri(self.subject.unpack_uri(uri),
5376
exceptions)
54-
exceptions = _check_uri(self.container.unpack_uri(value),
77+
for attr, value in self.subject.attrs.iteritems():
78+
exceptions = _check_uri(self.subject.unpack_uri(attr),
5579
exceptions)
56-
80+
if isinstance(value, str):
81+
exceptions = _check_uri(self.subject.unpack_uri(value),
82+
exceptions)
5783
return exceptions
5884

59-
85+
86+
class ContainerValidation(SubjectValidation):
87+
88+
def __init__(self, container):
89+
self.container = container
90+
self.subject = container
91+
92+
def exceptions(self):
93+
exceptions = []
94+
exceptions = self.check_attr_uris(exceptions)
95+
return exceptions
96+
97+
class DatasetValidation(SubjectValidation):
98+
99+
100+
def __init__(self, name, dataset, subject, fhandle):
101+
self.dataset = dataset
102+
self.subject = subject
103+
self.name = name
104+
self.fhandle = fhandle
105+
106+
def exceptions(self):
107+
exceptions = []
108+
exceptions = self.check_attr_uris(exceptions)
109+
exceptions = self.check_array_references(exceptions)
110+
return exceptions
111+
112+
def check_array_references(self, exceptions):
113+
def _check_ref(pdataset, parray, cdataset, carray):
114+
if not valid_array_reference(parray, carray):
115+
msg = ('{} declares a child of {} but the arrays'
116+
'do not conform to the bald array reference'
117+
'rules')
118+
msg = msg.format(pdataset, cdataset)
119+
exceptions.append(ValueError(msg))
120+
return exceptions
121+
122+
# for dataset in ?????
123+
# for parent child relation in dataset
124+
# check the broadcasting relationship
125+
# parray = np.array((1,2))
126+
# carray = np.array((3,4))
127+
128+
for attr, value in self.subject.attrs.iteritems():
129+
# should support subtypes
130+
#import pdb; pdb.set_trace()
131+
if attr == 'bald_._reference':
132+
# check if it's this type, otherwise exception)
133+
# if isinstance(value,
134+
child_dset = self.fhandle[value]
135+
parray = np.zeros(self.dataset.shape)
136+
carray = np.zeros(child_dset.shape)
137+
138+
exceptions = _check_ref('p', parray, 'c', carray)
139+
return exceptions
140+
60141

61142
class ValidationSet(Validation):
62143
pass

0 commit comments

Comments
 (0)