Skip to content

Commit e187465

Browse files
committed
isAliasedBy graph
1 parent de0935f commit e187465

File tree

9 files changed

+226
-204
lines changed

9 files changed

+226
-204
lines changed

lib/bald/__init__.py

Lines changed: 57 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import jinja2
77
import netCDF4
88
import numpy as np
9+
import pyparsing
910
import rdflib
1011
import requests
1112
import six
@@ -286,7 +287,12 @@ def __init__(self, baseuri, relative_id, attrs=None, prefixes=None,
286287

287288
@property
288289
def identity(self):
289-
return '/'.join([self.baseuri, self.relative_id])
290+
if self.relative_id:
291+
result = '/'.join([self.baseuri, self.relative_id])
292+
else:
293+
result = self.baseuri
294+
return result
295+
# return '/'.join([self.baseuri, self.relative_id])
290296

291297
def __str__(self):
292298
return '{}:{}: {}'.format(self.identity, type(self), self.attrs)
@@ -378,12 +384,19 @@ def unpack_rdfobject(self, astring, predicate):
378384
'?uri dct:identifier "{id}" ; '
379385
' rdf:type ?range .'
380386
'}}'.format(pred=predicate, id=astring))
381-
qres = self.alias_graph.query(rdfobj_alias_query)
382-
results = list(qres)
383-
if len(results) > 1:
384-
raise ValueError('multiple alias options')
385-
elif len(results) == 1:
386-
result = str(results[0][0])
387+
# qres = self.alias_graph.query(rdfobj_alias_query)
388+
try:
389+
qres = self.alias_graph.query(rdfobj_alias_query)
390+
# except Exception:
391+
# import pdb; pdb.set_trace()
392+
# qres = self.alias_graph.query(rdfobj_alias_query)
393+
results = list(qres)
394+
if len(results) > 1:
395+
raise ValueError('multiple alias options')
396+
elif len(results) == 1:
397+
result = str(results[0][0])
398+
except pyparsing.ParseException:
399+
pass
387400
return result
388401

389402
# def unpack_uri(self, astring):
@@ -424,24 +437,27 @@ def graph_elems(self):
424437
def _graph_elem_attrs(self, remaining_attrs):
425438
attrs = []
426439
for attr in remaining_attrs:
427-
if is_http_uri(self.unpack_uri(attr)):
440+
attr_uri = self.unpack_predicate(attr)
441+
if is_http_uri(attr_uri):
428442
kstr = self.link_template + ': '
429-
kstr = kstr.format(url=self.unpack_uri(attr), key=attr)
443+
kstr = kstr.format(url=attr_uri, key=attr)
430444
else:
431445
kstr = '{key}: '.format(key=attr)
432446
vals = remaining_attrs[attr]
433447
if isinstance(vals, six.string_types):
434-
if is_http_uri(self.unpack_uri(vals)):
448+
vuri = self.unpack_rdfobject(vals, predicate=attr_uri)
449+
if is_http_uri(vuri):
435450
vstr = self.link_template
436-
vstr = vstr.format(url=self.unpack_uri(vals), key=vals)
451+
vstr = vstr.format(url=vuri, key=vals)
437452
else:
438453
vstr = '{key}'.format(key=vals)
439454
else:
440455
vstrlist = []
441456
for val in vals:
442-
if is_http_uri(self.unpack_uri(val)):
457+
vuri = self.unpack_rdfobject(val, predicate=attr_uri)
458+
if is_http_uri(vuri):
443459
vstr = self.link_template
444-
vstr = vstr.format(url=self.unpack_uri(val), key=val)
460+
vstr = vstr.format(url=vuri, key=val)
445461
elif isinstance(val, Subject):
446462
vstr = ''
447463
else:
@@ -461,7 +477,7 @@ def _graph_elem_attrs(self, remaining_attrs):
461477
atype = self.link_template
462478
type_links = []
463479
for rdftype in self.rdf__type:
464-
type_links.append(atype.format(url=self.unpack_uri(rdftype), key=rdftype))
480+
type_links.append(atype.format(url=self.unpack_rdfobject(rdftype, 'rdf__type'), key=rdftype))
465481
type_links.sort()
466482
avar = avar.format(var=self.identity, type=', '.join(type_links), attrs=attrs)
467483

@@ -509,11 +525,6 @@ def rdfnode(self, graph):
509525
except AssertionError:
510526

511527
graph.add((selfnode, rdfpred, rdfobj))
512-
# elif is_http_uri(self.unpack_uri(obj)):
513-
# rdfobj = rdflib.URIRef(self.unpack_uri(obj))
514-
# else:
515-
# rdfobj = rdflib.Literal(obj)
516-
# graph.add((selfnode, rdflib.URIRef(self.unpack_uri(attr)), rdfobj))
517528
if isinstance(obj, Subject):
518529
obj_ref = rdflib.URIRef(obj.identity)
519530
if (obj_ref, None, None) not in graph:
@@ -683,7 +694,10 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None):
683694

684695
aliasgraph = rdflib.Graph()
685696
for alias in aliases:
686-
aliasgraph.parse(aliases[alias], format='xml')
697+
try:
698+
aliasgraph.parse(aliases[alias], format='xml')
699+
except TypeError:
700+
pass
687701
# if hasattr(fhandle, 'Conventions'):
688702
# conventions = [c.strip() for c in fhandle.Conventions.split(',')]
689703
# for conv in conventions:
@@ -803,9 +817,10 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None):
803817
refset.add(file_variables.get(dim))
804818
# Else, define a bald:childBroadcast
805819
else:
820+
# import pdb; pdb.set_trace()
806821
identity = '{}_{}_ref'.format(name, dim)
807-
if baseuri is not None:
808-
identity = baseuri + '/' + '{}_{}_ref'.format(name, dim)
822+
# if baseuri is not None:
823+
# identity = baseuri + '/' + '{}_{}_ref'.format(name, dim)
809824
rattrs = {}
810825
rattrs['rdf__type'] = 'bald__Reference'
811826
reshape = [1 for adim in var_shape]
@@ -814,7 +829,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None):
814829
reshape[cvi] = fhandle.variables[dim].size
815830
rattrs['bald__childBroadcast'] = tuple(reshape)
816831
rattrs['bald__array'] = set((file_variables.get(dim),))
817-
ref_node = Subject(baseuri, name, rattrs,
832+
ref_node = Subject(baseuri, identity, rattrs,
818833
prefixes=prefixes,
819834
aliases=aliases,
820835
alias_graph=aliasgraph)
@@ -827,23 +842,23 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None):
827842
return root_container
828843

829844

830-
def validate_netcdf(afilepath, cache=None):
845+
def validate_netcdf(afilepath, cache=None, baseuri=None):
831846
"""
832847
Validate a file with respect to binary-array-linked-data.
833848
Returns a :class:`bald.validation.Validation`
834849
835850
"""
836-
root_container = load_netcdf(afilepath)
851+
root_container = load_netcdf(afilepath, baseuri=baseuri)
837852
return validate(root_container, cache=cache)
838853

839854

840-
def validate_hdf5(afilepath):
855+
def validate_hdf5(afilepath, cache=None, baseuri=None):
841856
"""
842857
Validate a file with respect to binary-array-linked-data.
843858
Returns a :class:`bald.validation.Validation`
844859
845860
"""
846-
root_container = load_hdf5(afilepath)
861+
root_container = load_hdf5(afilepath, baseuri=baseuri)
847862
return validate(root_container)
848863

849864
def validate(root_container, sval=None, cache=None):
@@ -881,15 +896,20 @@ def careful_update(adict, bdict):
881896
adict.update(bdict)
882897
return adict
883898

884-
def load_hdf5(afilepath, uri=None):
899+
def load_hdf5(afilepath, baseuri=None, alias_dict=None):
885900
with load(afilepath) as fhandle:
886901
# unused?
887902
cache = {}
888-
root_container, file_variables = _hdf_group(fhandle, uri=uri)
903+
if baseuri is None:
904+
baseuri = 'file://{}'.format(afilepath)
905+
906+
root_container, file_variables = _hdf_group(fhandle, baseuri=baseuri,
907+
alias_dict=alias_dict)
889908
_hdf_references(fhandle, root_container, file_variables)
890909
return root_container
891910

892-
def _hdf_group(fhandle, id='root', uri=None, prefixes=None, aliases=None):
911+
def _hdf_group(fhandle, identity='root', baseuri=None, prefixes=None,
912+
aliases=None, alias_dict=None):
893913

894914
prefix_group = fhandle.attrs.get('bald__isPrefixedBy')
895915
if prefixes is None:
@@ -899,14 +919,14 @@ def _hdf_group(fhandle, id='root', uri=None, prefixes=None, aliases=None):
899919
alias_group = fhandle.attrs.get('bald__isAliasedBy')
900920
if aliases is None:
901921
aliases = {}
922+
if alias_dict is None:
923+
alias_dict = {}
902924
if alias_group:
903925
aliases = careful_update(aliases, dict(fhandle[alias_group].attrs))
904926
attrs = dict(fhandle.attrs)
905-
if uri is not None:
906-
identity = uri + id
907-
else:
908-
identity = id
909-
root_container = Container(identity, attrs, prefixes=prefixes, aliases=aliases)
927+
aliasgraph = rdflib.Graph()
928+
root_container = Container(baseuri, identity, attrs, prefixes=prefixes,
929+
aliases=aliases, alias_graph=aliasgraph)
910930

911931
root_container.attrs['bald__contains'] = []
912932

@@ -918,14 +938,14 @@ def _hdf_group(fhandle, id='root', uri=None, prefixes=None, aliases=None):
918938
(alias_group and dataset == fhandle[alias_group]))
919939
if not skip:
920940
if isinstance(dataset, h5py._hl.group.Group):
921-
new_cont, new_fvars = _hdf_group(dataset, name, uri, prefixes, aliases)
941+
new_cont, new_fvars = _hdf_group(dataset, name, baseuri, prefixes, aliases)
922942
root_container.attrs['bald__contains'].append(new_cont)
923943
file_variables = careful_update(file_variables, new_fvars)
924944
#if hasattr(dataset, 'shape'):
925945
elif isinstance(dataset, h5py._hl.dataset.Dataset):
926946
sattrs = dict(dataset.attrs)
927947
sattrs['bald__shape'] = dataset.shape
928-
dset = Array(name, sattrs, prefixes, aliases)
948+
dset = Array(baseuri, name, sattrs, prefixes, aliases, aliasgraph)
929949
root_container.attrs['bald__contains'].append(dset)
930950
file_variables[dataset.name] = dset
931951
return root_container, file_variables

lib/bald/tests/integration/CDL/array_alias.cdl

Lines changed: 0 additions & 26 deletions
This file was deleted.

lib/bald/tests/integration/CDL/ereefs_gbr4_ncld.cdl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,9 @@ variables:
136136

137137
group: alias_list {
138138
// group attributes:
139-
:unit_id = "http://qudt.org/1.1/schema/qudt#unit";
140-
:medium_id = "http://environment.data.gov.au/def/op#matrix" ;
141-
:scaledQuantityKind_id = "http://environment.data.gov.au/def/op#propertyKind" ;
142-
:substanceOrTaxon_id = "http://environment.data.gov.au/def/op#objectOfInterest" ;
139+
:qudt = "http://qudt.org/1.1/schema/qudt";
140+
:ed_gov_au_op = "http://environment.data.gov.au/def/op" ;
141+
143142
} // group bald__alias_list
144143

145144
}

0 commit comments

Comments
 (0)