Skip to content

Commit 7adc94d

Browse files
committed
aliases
1 parent e26d278 commit 7adc94d

File tree

3 files changed

+31
-11
lines changed

3 files changed

+31
-11
lines changed

lib/bald/__init__.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@ class HttpCache(object):
1515
def __init__(self):
1616
self.cache = {}
1717

18+
def is_http_uri(self, item):
19+
return item.startswith('http://') or item.startswith('https://')
20+
1821
def __getitem__(self, item):
1922

20-
if not item.startswith('http://') or item.startswith('https://'):
23+
if not self.is_http_uri(item):
2124
raise ValueError('{} is not a HTTP URI.'.format(item))
2225
if item not in self.cache:
2326
headers = {'Accept': 'text/turtle'}
@@ -33,7 +36,7 @@ def check_uri(self, uri):
3336

3437

3538
class Subject(object):
36-
def __init__(self, attrs=None, prefixes=None):
39+
def __init__(self, attrs=None, prefixes=None, aliases=None):
3740
"""
3841
A subject of metadata statements.
3942
@@ -43,7 +46,10 @@ def __init__(self, attrs=None, prefixes=None):
4346
attrs = {}
4447
if prefixes is None:
4548
prefixes = {}
49+
if aliases is None:
50+
aliases = {}
4651
self.attrs = attrs
52+
self.aliases = aliases
4753
self._prefixes = prefixes
4854
self._prefix_suffix = re.compile('(^(?:(?!__).)*)__((?!.*__).*$)')
4955
_http_p = 'http[s]?://.*'
@@ -69,6 +75,8 @@ def unpack_uri(self, astring):
6975
if self._http_uri.match(self.prefixes()[prefix]):
7076
result = astring.replace('{}__'.format(prefix),
7177
self.prefixes()[prefix])
78+
elif astring in self.aliases:
79+
result = self.aliases[astring]
7280
return result
7381

7482

@@ -147,7 +155,11 @@ def validate_hdf5(afilepath):
147155
prefixes = {}
148156
if prefix_group:
149157
prefixes = fhandle[prefix_group].attrs
150-
root_container = Subject(fhandle.attrs, prefixes=prefixes)
158+
alias_group = fhandle.attrs.get('bald__isAliasedBy')
159+
aliases = {}
160+
if alias_group:
161+
aliases = dict(fhandle[alias_group].attrs.iteritems())
162+
root_container = Subject(fhandle.attrs, prefixes=prefixes, aliases=aliases)
151163
root_val = bv.ContainerValidation(subject=root_container,
152164
fhandle=fhandle)
153165
sval.stored_exceptions += root_val.exceptions()
@@ -159,7 +171,7 @@ def validate_hdf5(afilepath):
159171
# #
160172
sattrs = dict(fhandle.attrs).copy()
161173
sattrs.update(dataset.attrs)
162-
dset = Subject(sattrs, prefixes)
174+
dset = Subject(sattrs, prefixes, aliases)
163175
dset_val = bv.ArrayValidation(name, dataset, fhandle=fhandle,
164176
subject=dset)
165177
sval.stored_exceptions += dset_val.exceptions()

lib/bald/tests/integration/test_aliases.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ def test_valid_uri(self):
3232
f = h5py.File(tfile, "w")
3333
f = _fattrs(f)
3434
group_alias = f.create_group('bald__alias_list')
35-
f.attrs['bald__aliases'] = group_alias.ref
36-
group_alias['skosPrefLabel'] = 'http://www.w3.org/2004/02/skos/core#prefLabel'
35+
f.attrs['bald__isAliasedBy'] = group_alias.ref
36+
group_alias.attrs['skosPrefLabel'] = 'http://www.w3.org/2004/02/skos/core#prefLabel'
3737
dsetp = f.create_dataset("parent_dataset", (11, 17), dtype='i')
3838
dsetp.attrs['skosPrefLabel'] = 'alabel'
3939
f.close()
@@ -46,8 +46,8 @@ def test_invalid_uri(self):
4646
f = _fattrs(f)
4747
f.attrs['bald__turtle'] = 'bald__walnut'
4848
group_alias = f.create_group('bald__alias_list')
49-
f.attrs['bald__aliases'] = group_alias.ref
50-
group_alias['skosPrefLabel'] = 'http://www.w3.org/2004/02/skos/core#notThisPrefLabel'
49+
f.attrs['bald__isAliasedBy'] = group_alias.ref
50+
group_alias.attrs['skosPrefLabel'] = 'http://www.w3.org/2004/02/skos/core#notThisPrefLabel'
5151
dsetp = f.create_dataset("parent_dataset", (11, 17), dtype='i')
5252
dsetp.attrs['skosPrefLabel'] = 'alabel'
5353
f.close()

lib/bald/validation.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,16 @@ def _check_uri(uri, exceptions):
9191
for pref, uri in self.subject.prefixes().iteritems():
9292
exceptions = _check_uri(self.subject.unpack_uri(uri),
9393
exceptions)
94+
for alias, uri in self.subject.aliases.iteritems():
95+
exceptions = _check_uri(self.subject.unpack_uri(uri),
96+
exceptions)
9497
for attr, value in self.subject.attrs.iteritems():
9598
exceptions = _check_uri(self.subject.unpack_uri(attr),
9699
exceptions)
97100
if isinstance(value, str):
98-
exceptions = _check_uri(self.subject.unpack_uri(value),
99-
exceptions)
101+
val = self.subject.unpack_uri(value)
102+
if self.cache.is_http_uri(val):
103+
exceptions = _check_uri(val, exceptions)
100104
return exceptions
101105

102106
def check_attr_domain_range(self, exceptions):
@@ -106,7 +110,11 @@ def check_attr_domain_range(self, exceptions):
106110
# thus we have a payload
107111
# go rdf
108112
g = rdflib.Graph()
109-
g.parse(data=self.cache[uri].text, format="n3")
113+
data=self.cache[uri].text
114+
try:
115+
g.parse(data=self.cache[uri].text, format="n3")
116+
except Exception, e:
117+
g.parse(data=self.cache[uri].text, format="xml")
110118
query = ('SELECT ?s \n'
111119
'(GROUP_CONCAT(?domain; SEPARATOR=" | ") AS ?domains)'
112120
' \n'

0 commit comments

Comments
 (0)