Skip to content

Commit bb419f7

Browse files
committed
Merge pull request #4 from marqh/netcdf
Netcdf
2 parents 36321f5 + c78ed91 commit bb419f7

File tree

6 files changed

+221
-84
lines changed

6 files changed

+221
-84
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ dist/
1515
downloads/
1616
eggs/
1717
.eggs/
18-
lib/
1918
lib64/
2019
parts/
2120
sdist/

lib/bald/__init__.py

Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
import re
33

44
import h5py
5+
import netCDF4
56
import requests
67

7-
from bald.validation import ContainerValidation, DatasetValidation
8+
import bald.validation as bv
89

910

1011
class HttpCache(object):
@@ -19,18 +20,18 @@ def __getitem__(self, item):
1920
if not item.startswith('http://') or item.startswith('https://'):
2021
raise ValueError('{} is not a HTTP URI.'.format(item))
2122
if item not in self.cache:
22-
headers={'Accept':'text/turtle'}
23+
headers = {'Accept': 'text/turtle'}
2324
self.cache[item] = requests.get(item, headers=headers)
2425

2526
return self.cache[item]
26-
2727

2828
def check_uri(self, uri):
2929
result = False
3030
if self[uri].status_code == 200:
3131
result = True
3232
return result
3333

34+
3435
class Subject(object):
3536
def __init__(self, attrs=None):
3637
"""
@@ -46,32 +47,47 @@ def __init__(self, attrs=None):
4647
self._http_uri = re.compile('{}'.format(_http_p))
4748
self._http_uri_prefix = re.compile('{}/|#'.format(_http_p))
4849

49-
5050
def prefixes(self):
5151
prefixes = {}
5252
for key, value in self.attrs.iteritems():
5353
if key.endswith('__') and self._http_uri_prefix.match(value):
5454
pref = key.rstrip('__')
55-
if prefixes.has_key(pref):
56-
raise ValueError('This container has conflicting prefix definitions')
55+
if pref in prefixes:
56+
raise ValueError('This container has conflicting prefix'
57+
' definitions.')
5758
prefixes[pref] = value
5859
return prefixes
5960

60-
6161
def unpack_uri(self, astring):
6262
result = astring
6363
if self._prefix_suffix.match(astring):
6464
prefix, suffix = self._prefix_suffix.match(astring).groups()
65-
66-
if self.prefixes().has_key(prefix):
65+
if prefix in self.prefixes():
6766
if self._http_uri.match(self.prefixes()[prefix]):
68-
result = astring.replace('{}__'.format(prefix), self.prefixes()[prefix])
67+
result = astring.replace('{}__'.format(prefix),
68+
self.prefixes()[prefix])
6969
return result
7070

71+
72+
@contextlib.contextmanager
73+
def load(afilepath):
74+
if afilepath.endswith('.hdf'):
75+
loader = h5py.File
76+
else:
77+
raise ValueError('filepath suffix not supported')
78+
try:
79+
f = loader(afilepath, "r")
80+
yield f
81+
finally:
82+
f.close()
83+
84+
7185
@contextlib.contextmanager
7286
def load(afilepath):
7387
if afilepath.endswith('.hdf'):
7488
loader = h5py.File
89+
elif afilepath.endswith('.nc'):
90+
loader = netCDF4.Dataset
7591
else:
7692
raise ValueError('filepath suffix not supported')
7793
try:
@@ -81,32 +97,56 @@ def load(afilepath):
8197
f.close()
8298

8399

100+
def validate_netcdf(afilepath):
101+
"""
102+
Validate a file with respect to binarry-array-linked-data.
103+
Returns a :class:`bald.validation.Validation`
104+
"""
105+
106+
with load(afilepath) as fhandle:
107+
sval = bv.StoredValidation()
108+
attrs = {}
109+
for k in fhandle.ncattrs():
110+
attrs[k] = getattr(fhandle, k)
111+
root_container = Subject(attrs)
112+
root_val = bv.ContainerValidation(subject=root_container,
113+
fhandle=fhandle)
114+
sval.stored_exceptions += root_val.exceptions()
115+
for name in fhandle.variables:
116+
sattrs = fhandle.__dict__.copy()
117+
sattrs.update(fhandle.variables[name].__dict__.copy())
118+
var = Subject(sattrs)
119+
var_val = bv.ArrayValidation(name, fhandle.variables[name], fhandle=fhandle,
120+
subject=var)
121+
sval.stored_exceptions += var_val.exceptions()
122+
123+
return sval
124+
125+
84126
def validate_hdf5(afilepath):
85127
"""
86128
Validate a file with respect ot binarry-array-linked-data.
87-
Returns a :class:`bald.validation.Validation`
129+
Returns a :class:`bald.validation.Validation`
88130
"""
89-
131+
90132
with load(afilepath) as fhandle:
91-
valid = True
133+
sval = bv.StoredValidation()
92134
cache = {}
93135
root_container = Subject(fhandle.attrs)
94-
root_val = ContainerValidation(subject=root_container, fhandle=fhandle)
95-
if not root_val.is_valid():
96-
valid = False
136+
root_val = bv.ContainerValidation(subject=root_container,
137+
fhandle=fhandle)
138+
sval.stored_exceptions += root_val.exceptions()
97139
# iterate through the datasets
98140
for name, dataset in fhandle.items():
99-
# a dataset's attribute collection inherits from and specialises it's
100-
# container's attrbiute collection
141+
# a dataset's attribute collection inherits from and
142+
# specialises it's container's attrbiute collection
143+
# this only helps with prefixes, afaik, hence:
144+
# #
101145
sattrs = dict(fhandle.attrs).copy()
102146
sattrs.update(dataset.attrs)
103147
dset = Subject(sattrs)
104-
dset_val = DatasetValidation(name, dataset, fhandle=fhandle, subject=dset)
105-
if not dset_val.is_valid():
106-
valid = False
107-
108-
return valid
109-
110-
111-
112-
148+
dset_val = bv.ArrayValidation(name, dataset, fhandle=fhandle,
149+
subject=dset)
150+
sval.stored_exceptions += dset_val.exceptions()
151+
152+
return sval

lib/bald/tests/__init__.pyc

-6 Bytes
Binary file not shown.
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import unittest
2+
3+
import h5py
4+
import netCDF4
5+
import numpy as np
6+
7+
import bald
8+
from bald.tests import BaldTestCase
9+
10+
def _fattrs(f):
11+
f.bald__ = 'http://binary-array-ld.net/latest/'
12+
f.rdf__ = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
13+
f.rdf__type = 'bald__Container'
14+
return f
15+
16+
def _create_parent_child(f, pshape, cshape):
17+
for i, pdimsize in enumerate(pshape):
18+
f.createDimension("pdim{}".format(str(i)), pdimsize)
19+
for i, cdimsize in enumerate(cshape):
20+
f.createDimension("cdim{}".format(str(i)), cdimsize)
21+
varp = f.createVariable("parent_variable", 'i4', tuple(["pdim{}".format(str(i)) for i, _ in enumerate(pshape)]))
22+
varc = f.createVariable("child_variable", 'i4', tuple(["cdim{}".format(str(i)) for i, _ in enumerate(cshape)]))
23+
varp.rdf__type = 'bald__Array'
24+
varp.bald__references = "child_variable"
25+
varc.rdf__type = 'bald__Array'
26+
varc.rdf__type = 'bald__Reference'
27+
varc.bald__array = "child_variable"
28+
return f
29+
30+
31+
class Test(BaldTestCase):
32+
33+
def test_valid_uri(self):
34+
with self.temp_filename('.nc') as tfile:
35+
f = netCDF4.Dataset(tfile, "w", format="NETCDF4")
36+
37+
f = _fattrs(f)
38+
f.close()
39+
validation = bald.validate_netcdf(tfile)
40+
self.assertTrue(validation.is_valid())
41+
42+
def test_invalid_uri(self):
43+
with self.temp_filename('.nc') as tfile:
44+
f = netCDF4.Dataset(tfile, "w", format="NETCDF4")
45+
46+
f = _fattrs(f)
47+
setattr(f, 'bald__turtle', 'bald__walnut')
48+
f.close()
49+
validation = bald.validate_netcdf(tfile)
50+
self.assertFalse(validation.is_valid())
51+
52+
53+
class TestArrayReference(BaldTestCase):
54+
def test_match(self):
55+
with self.temp_filename('.nc') as tfile:
56+
f = netCDF4.Dataset(tfile, "w", format="NETCDF4")
57+
f = _fattrs(f)
58+
f = _create_parent_child(f, (11, 17), (11, 17))
59+
f.close()
60+
validation = bald.validate_netcdf(tfile)
61+
self.assertTrue(validation.is_valid())
62+
63+
def test_mismatch_zeroth(self):
64+
with self.temp_filename('.nc') as tfile:
65+
f = netCDF4.Dataset(tfile, "w", format="NETCDF4")
66+
f = _fattrs(f)
67+
f = _create_parent_child(f, (11, 17), (11, 13))
68+
f.close()
69+
validation = bald.validate_netcdf(tfile)
70+
self.assertFalse(validation.is_valid())
71+
72+
73+
if __name__ == '__main__':
74+
unittest.main()
75+
76+
77+
78+

lib/bald/tests/integration/test_validation.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,19 @@
77
from bald.tests import BaldTestCase
88

99
def _fattrs(f):
10-
f.attrs['bald__'] = 'http://binary-array-ld.net/experimental/'
10+
f.attrs['bald__'] = 'http://binary-array-ld.net/latest/'
1111
f.attrs['rdf__'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
1212
f.attrs['rdf__type'] = 'bald__Container'
1313
return f
1414

1515
def _create_parent_child(f, pshape, cshape):
1616
dsetp = f.create_dataset("parent_dataset", pshape, dtype='i')
1717
dsetc = f.create_dataset("child_dataset", cshape, dtype='i')
18-
dsetp.attrs['rdf__type'] = 'bald__Dataset'
18+
dsetp.attrs['rdf__type'] = 'bald__Array'
1919
dsetp.attrs['bald__references'] = dsetc.ref
20-
dsetc.attrs['rdf__type'] = 'bald__Dataset'
20+
dsetc.attrs['rdf__type'] = 'bald__Array'
2121
dsetc.attrs['rdf__type'] = 'bald__Reference'
22-
dsetc.attrs['bald__dataset'] = dsetc.ref
22+
dsetc.attrs['bald__array'] = dsetc.ref
2323
return f
2424

2525

@@ -31,7 +31,8 @@ def test_valid_uri(self):
3131
f = _fattrs(f)
3232
f = _create_parent_child(f, (11, 17), (11, 17))
3333
f.close()
34-
self.assertTrue(bald.validate_hdf5(tfile))
34+
validation = bald.validate_hdf5(tfile)
35+
self.assertTrue(validation.is_valid())
3536

3637
def test_invalid_uri(self):
3738
with self.temp_filename('.hdf') as tfile:
@@ -40,7 +41,8 @@ def test_invalid_uri(self):
4041
f = _create_parent_child(f, (11, 17), (11, 17))
4142
f.attrs['bald__turtle'] = 'bald__walnut'
4243
f.close()
43-
self.assertFalse(bald.validate_hdf5(tfile))
44+
validation = bald.validate_hdf5(tfile)
45+
self.assertFalse(validation.is_valid())
4446

4547
class TestArrayReference(BaldTestCase):
4648
def test_match(self):
@@ -49,23 +51,26 @@ def test_match(self):
4951
f = _fattrs(f)
5052
f = _create_parent_child(f, (11, 17), (11, 17))
5153
f.close()
52-
self.assertTrue(bald.validate_hdf5(tfile))
54+
validation = bald.validate_hdf5(tfile)
55+
self.assertTrue(validation.is_valid())
5356

5457
def test_mismatch_zeroth(self):
5558
with self.temp_filename('.hdf') as tfile:
5659
f = h5py.File(tfile, "w")
5760
f = _fattrs(f)
5861
f = _create_parent_child(f, (11, 17), (11, 13))
5962
f.close()
60-
self.assertFalse(bald.validate_hdf5(tfile))
63+
validation = bald.validate_hdf5(tfile)
64+
self.assertFalse(validation.is_valid())
6165

6266
def test_mismatch_oneth(self):
6367
with self.temp_filename('.hdf') as tfile:
6468
f = h5py.File(tfile, "w")
6569
f = _fattrs(f)
6670
f = _create_parent_child(f, (11, 17), (13, 17))
6771
f.close()
68-
self.assertFalse(bald.validate_hdf5(tfile))
72+
validation = bald.validate_hdf5(tfile)
73+
self.assertFalse(validation.is_valid())
6974

7075
def test_match_plead_dim(self):
7176
with self.temp_filename('.hdf') as tfile:
@@ -74,7 +79,8 @@ def test_match_plead_dim(self):
7479
# parent has leading dimension wrt child
7580
f = _create_parent_child(f, (4, 13, 17), (13, 17))
7681
f.close()
77-
self.assertTrue(bald.validate_hdf5(tfile))
82+
validation = bald.validate_hdf5(tfile)
83+
self.assertTrue(validation.is_valid())
7884

7985
def test_match_clead_dim(self):
8086
with self.temp_filename('.hdf') as tfile:
@@ -83,7 +89,8 @@ def test_match_clead_dim(self):
8389
# child has leading dimension wrt parent
8490
f = _create_parent_child(f, (13, 17), (7, 13, 17))
8591
f.close()
86-
self.assertTrue(bald.validate_hdf5(tfile))
92+
validation = bald.validate_hdf5(tfile)
93+
self.assertTrue(validation.is_valid())
8794

8895
def test_mismatch_pdisjc_lead_dim(self):
8996
with self.temp_filename('.hdf') as tfile:
@@ -93,7 +100,8 @@ def test_mismatch_pdisjc_lead_dim(self):
93100
f = _create_parent_child(f, (4, 13, 17), (7, 13, 17))
94101

95102
f.close()
96-
self.assertFalse(bald.validate_hdf5(tfile))
103+
validation = bald.validate_hdf5(tfile)
104+
self.assertFalse(validation.is_valid())
97105

98106
def test_mismatch_pdisjc_trail_dim(self):
99107
with self.temp_filename('.hdf') as tfile:
@@ -102,8 +110,8 @@ def test_mismatch_pdisjc_trail_dim(self):
102110
# child and parent have disjoint trailing dimensions
103111
f = _create_parent_child(f, (13, 17, 2), (13, 17, 9))
104112
f.close()
105-
self.assertFalse(bald.validate_hdf5(tfile))
106-
113+
validation = bald.validate_hdf5(tfile)
114+
self.assertFalse(validation.is_valid())
107115

108116

109117
# def test_match_(self):
@@ -113,7 +121,7 @@ def test_mismatch_pdisjc_trail_dim(self):
113121
# #
114122
# f = _create_parent_child(f, (), ())
115123
# f.close()
116-
# self.assert(bald.validate_hdf5(tfile))
124+
# self.assert(bald.validate_hdf5(tfile).is_valid())
117125

118126
if __name__ == '__main__':
119127
unittest.main()

0 commit comments

Comments
 (0)