Skip to content

Commit 515ebd6

Browse files
committed
nc first step
1 parent 36321f5 commit 515ebd6

File tree

6 files changed

+147
-64
lines changed

6 files changed

+147
-64
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ dist/
1515
downloads/
1616
eggs/
1717
.eggs/
18-
lib/
1918
lib64/
2019
parts/
2120
sdist/

lib/bald/__init__.py

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
import re
33

44
import h5py
5+
import netCDF4
56
import requests
67

7-
from bald.validation import ContainerValidation, DatasetValidation
8+
import bald.validation as bv
89

910

1011
class HttpCache(object):
@@ -19,18 +20,18 @@ def __getitem__(self, item):
1920
if not item.startswith('http://') or item.startswith('https://'):
2021
raise ValueError('{} is not a HTTP URI.'.format(item))
2122
if item not in self.cache:
22-
headers={'Accept':'text/turtle'}
23+
headers = {'Accept': 'text/turtle'}
2324
self.cache[item] = requests.get(item, headers=headers)
2425

2526
return self.cache[item]
26-
2727

2828
def check_uri(self, uri):
2929
result = False
3030
if self[uri].status_code == 200:
3131
result = True
3232
return result
3333

34+
3435
class Subject(object):
3536
def __init__(self, attrs=None):
3637
"""
@@ -46,32 +47,47 @@ def __init__(self, attrs=None):
4647
self._http_uri = re.compile('{}'.format(_http_p))
4748
self._http_uri_prefix = re.compile('{}/|#'.format(_http_p))
4849

49-
5050
def prefixes(self):
5151
prefixes = {}
5252
for key, value in self.attrs.iteritems():
5353
if key.endswith('__') and self._http_uri_prefix.match(value):
5454
pref = key.rstrip('__')
55-
if prefixes.has_key(pref):
56-
raise ValueError('This container has conflicting prefix definitions')
55+
if pref in prefixes:
56+
raise ValueError('This container has conflicting prefix'
57+
' definitions.')
5758
prefixes[pref] = value
5859
return prefixes
5960

60-
6161
def unpack_uri(self, astring):
6262
result = astring
6363
if self._prefix_suffix.match(astring):
6464
prefix, suffix = self._prefix_suffix.match(astring).groups()
65-
66-
if self.prefixes().has_key(prefix):
65+
if prefix in self.prefixes():
6766
if self._http_uri.match(self.prefixes()[prefix]):
68-
result = astring.replace('{}__'.format(prefix), self.prefixes()[prefix])
67+
result = astring.replace('{}__'.format(prefix),
68+
self.prefixes()[prefix])
6969
return result
7070

71+
72+
@contextlib.contextmanager
73+
def load(afilepath):
74+
if afilepath.endswith('.hdf'):
75+
loader = h5py.File
76+
else:
77+
raise ValueError('filepath suffix not supported')
78+
try:
79+
f = loader(afilepath, "r")
80+
yield f
81+
finally:
82+
f.close()
83+
84+
7185
@contextlib.contextmanager
7286
def load(afilepath):
7387
if afilepath.endswith('.hdf'):
7488
loader = h5py.File
89+
elif afilepath.endswith('.nc'):
90+
loader = netCDF4.Dataset
7591
else:
7692
raise ValueError('filepath suffix not supported')
7793
try:
@@ -81,32 +97,46 @@ def load(afilepath):
8197
f.close()
8298

8399

100+
def validate_netcdf(afilepath):
101+
"""
102+
Validate a file with respect to binarry-array-linked-data.
103+
Returns a :class:`bald.validation.Validation`
104+
"""
105+
106+
with load(afilepath) as fhandle:
107+
attrs = {}
108+
for k in fhandle.ncattrs():
109+
attrs[k] = getattr(fhandle, k)
110+
root_container = Subject(attrs)
111+
root_val = bv.ContainerValidation(subject=root_container,
112+
fhandle=fhandle)
113+
return root_val
114+
115+
84116
def validate_hdf5(afilepath):
85117
"""
86118
Validate a file with respect ot binarry-array-linked-data.
87-
Returns a :class:`bald.validation.Validation`
119+
Returns a :class:`bald.validation.Validation`
88120
"""
89-
121+
90122
with load(afilepath) as fhandle:
91-
valid = True
123+
sval = bv.StoredValidation()
92124
cache = {}
93125
root_container = Subject(fhandle.attrs)
94-
root_val = ContainerValidation(subject=root_container, fhandle=fhandle)
95-
if not root_val.is_valid():
96-
valid = False
126+
root_val = bv.ContainerValidation(subject=root_container,
127+
fhandle=fhandle)
128+
sval.stored_exceptions += root_val.exceptions()
97129
# iterate through the datasets
98130
for name, dataset in fhandle.items():
99-
# a dataset's attribute collection inherits from and specialises it's
100-
# container's attrbiute collection
131+
# a dataset's attribute collection inherits from and
132+
# specialises it's container's attrbiute collection
133+
# #
134+
# this only helps with prefixes, afaik
101135
sattrs = dict(fhandle.attrs).copy()
102136
sattrs.update(dataset.attrs)
103137
dset = Subject(sattrs)
104-
dset_val = DatasetValidation(name, dataset, fhandle=fhandle, subject=dset)
105-
if not dset_val.is_valid():
106-
valid = False
107-
108-
return valid
109-
110-
111-
112-
138+
dset_val = bv.DatasetValidation(name, dataset, fhandle=fhandle,
139+
subject=dset)
140+
sval.stored_exceptions += dset_val.exceptions()
141+
142+
return sval

lib/bald/tests/__init__.pyc

-6 Bytes
Binary file not shown.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import unittest
2+
3+
import h5py
4+
import netCDF4
5+
import numpy as np
6+
7+
import bald
8+
from bald.tests import BaldTestCase
9+
10+
def _fattrs(f):
11+
f.bald__ = 'http://binary-array-ld.net/experimental/'
12+
f.rdf__ = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
13+
f.rdf__type = 'bald__Container'
14+
return f
15+
16+
17+
class Test(BaldTestCase):
18+
19+
def test_valid_uri(self):
20+
with self.temp_filename('.nc') as tfile:
21+
f = netCDF4.Dataset(tfile, "w", format="NETCDF4")
22+
23+
f = _fattrs(f)
24+
f.close()
25+
validation = bald.validate_netcdf(tfile)
26+
self.assertTrue(validation.is_valid())
27+
28+
def test_invalid_uri(self):
29+
with self.temp_filename('.nc') as tfile:
30+
f = netCDF4.Dataset(tfile, "w", format="NETCDF4")
31+
32+
f = _fattrs(f)
33+
setattr(f, 'bald__turtle', 'bald__walnut')
34+
f.close()
35+
validation = bald.validate_netcdf(tfile)
36+
self.assertFalse(validation.is_valid())
37+
38+
39+
if __name__ == '__main__':
40+
unittest.main()
41+
42+
43+
44+

lib/bald/tests/integration/test_validation.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_valid_uri(self):
3131
f = _fattrs(f)
3232
f = _create_parent_child(f, (11, 17), (11, 17))
3333
f.close()
34-
self.assertTrue(bald.validate_hdf5(tfile))
34+
self.assertTrue(bald.validate_hdf5(tfile).is_valid())
3535

3636
def test_invalid_uri(self):
3737
with self.temp_filename('.hdf') as tfile:
@@ -40,7 +40,7 @@ def test_invalid_uri(self):
4040
f = _create_parent_child(f, (11, 17), (11, 17))
4141
f.attrs['bald__turtle'] = 'bald__walnut'
4242
f.close()
43-
self.assertFalse(bald.validate_hdf5(tfile))
43+
self.assertFalse(bald.validate_hdf5(tfile).is_valid())
4444

4545
class TestArrayReference(BaldTestCase):
4646
def test_match(self):
@@ -49,23 +49,23 @@ def test_match(self):
4949
f = _fattrs(f)
5050
f = _create_parent_child(f, (11, 17), (11, 17))
5151
f.close()
52-
self.assertTrue(bald.validate_hdf5(tfile))
52+
self.assertTrue(bald.validate_hdf5(tfile).is_valid())
5353

5454
def test_mismatch_zeroth(self):
5555
with self.temp_filename('.hdf') as tfile:
5656
f = h5py.File(tfile, "w")
5757
f = _fattrs(f)
5858
f = _create_parent_child(f, (11, 17), (11, 13))
5959
f.close()
60-
self.assertFalse(bald.validate_hdf5(tfile))
60+
self.assertFalse(bald.validate_hdf5(tfile).is_valid())
6161

6262
def test_mismatch_oneth(self):
6363
with self.temp_filename('.hdf') as tfile:
6464
f = h5py.File(tfile, "w")
6565
f = _fattrs(f)
6666
f = _create_parent_child(f, (11, 17), (13, 17))
6767
f.close()
68-
self.assertFalse(bald.validate_hdf5(tfile))
68+
self.assertFalse(bald.validate_hdf5(tfile).is_valid())
6969

7070
def test_match_plead_dim(self):
7171
with self.temp_filename('.hdf') as tfile:
@@ -74,7 +74,7 @@ def test_match_plead_dim(self):
7474
# parent has leading dimension wrt child
7575
f = _create_parent_child(f, (4, 13, 17), (13, 17))
7676
f.close()
77-
self.assertTrue(bald.validate_hdf5(tfile))
77+
self.assertTrue(bald.validate_hdf5(tfile).is_valid())
7878

7979
def test_match_clead_dim(self):
8080
with self.temp_filename('.hdf') as tfile:
@@ -83,7 +83,7 @@ def test_match_clead_dim(self):
8383
# child has leading dimension wrt parent
8484
f = _create_parent_child(f, (13, 17), (7, 13, 17))
8585
f.close()
86-
self.assertTrue(bald.validate_hdf5(tfile))
86+
self.assertTrue(bald.validate_hdf5(tfile).is_valid())
8787

8888
def test_mismatch_pdisjc_lead_dim(self):
8989
with self.temp_filename('.hdf') as tfile:
@@ -93,7 +93,7 @@ def test_mismatch_pdisjc_lead_dim(self):
9393
f = _create_parent_child(f, (4, 13, 17), (7, 13, 17))
9494

9595
f.close()
96-
self.assertFalse(bald.validate_hdf5(tfile))
96+
self.assertFalse(bald.validate_hdf5(tfile).is_valid())
9797

9898
def test_mismatch_pdisjc_trail_dim(self):
9999
with self.temp_filename('.hdf') as tfile:
@@ -102,7 +102,7 @@ def test_mismatch_pdisjc_trail_dim(self):
102102
# child and parent have disjoint trailing dimensions
103103
f = _create_parent_child(f, (13, 17, 2), (13, 17, 9))
104104
f.close()
105-
self.assertFalse(bald.validate_hdf5(tfile))
105+
self.assertFalse(bald.validate_hdf5(tfile).is_valid())
106106

107107

108108

@@ -113,7 +113,7 @@ def test_mismatch_pdisjc_trail_dim(self):
113113
# #
114114
# f = _create_parent_child(f, (), ())
115115
# f.close()
116-
# self.assert(bald.validate_hdf5(tfile))
116+
# self.assert(bald.validate_hdf5(tfile).is_valid())
117117

118118
if __name__ == '__main__':
119119
unittest.main()

0 commit comments

Comments
 (0)