6
6
import jinja2
7
7
import netCDF4
8
8
import numpy as np
9
+ import pyparsing
9
10
import rdflib
10
11
import requests
11
12
import six
@@ -286,7 +287,12 @@ def __init__(self, baseuri, relative_id, attrs=None, prefixes=None,
286
287
287
288
@property
288
289
def identity (self ):
289
- return '/' .join ([self .baseuri , self .relative_id ])
290
+ if self .relative_id :
291
+ result = '/' .join ([self .baseuri , self .relative_id ])
292
+ else :
293
+ result = self .baseuri
294
+ return result
295
+ # return '/'.join([self.baseuri, self.relative_id])
290
296
291
297
def __str__ (self ):
292
298
return '{}:{}: {}' .format (self .identity , type (self ), self .attrs )
@@ -378,12 +384,19 @@ def unpack_rdfobject(self, astring, predicate):
378
384
'?uri dct:identifier "{id}" ; '
379
385
' rdf:type ?range .'
380
386
'}}' .format (pred = predicate , id = astring ))
381
- qres = self .alias_graph .query (rdfobj_alias_query )
382
- results = list (qres )
383
- if len (results ) > 1 :
384
- raise ValueError ('multiple alias options' )
385
- elif len (results ) == 1 :
386
- result = str (results [0 ][0 ])
387
+ # qres = self.alias_graph.query(rdfobj_alias_query)
388
+ try :
389
+ qres = self .alias_graph .query (rdfobj_alias_query )
390
+ # except Exception:
391
+ # import pdb; pdb.set_trace()
392
+ # qres = self.alias_graph.query(rdfobj_alias_query)
393
+ results = list (qres )
394
+ if len (results ) > 1 :
395
+ raise ValueError ('multiple alias options' )
396
+ elif len (results ) == 1 :
397
+ result = str (results [0 ][0 ])
398
+ except pyparsing .ParseException :
399
+ pass
387
400
return result
388
401
389
402
# def unpack_uri(self, astring):
@@ -424,24 +437,27 @@ def graph_elems(self):
424
437
def _graph_elem_attrs (self , remaining_attrs ):
425
438
attrs = []
426
439
for attr in remaining_attrs :
427
- if is_http_uri (self .unpack_uri (attr )):
440
+ attr_uri = self .unpack_predicate (attr )
441
+ if is_http_uri (attr_uri ):
428
442
kstr = self .link_template + ': '
429
- kstr = kstr .format (url = self . unpack_uri ( attr ) , key = attr )
443
+ kstr = kstr .format (url = attr_uri , key = attr )
430
444
else :
431
445
kstr = '{key}: ' .format (key = attr )
432
446
vals = remaining_attrs [attr ]
433
447
if isinstance (vals , six .string_types ):
434
- if is_http_uri (self .unpack_uri (vals )):
448
+ vuri = self .unpack_rdfobject (vals , predicate = attr_uri )
449
+ if is_http_uri (vuri ):
435
450
vstr = self .link_template
436
- vstr = vstr .format (url = self . unpack_uri ( vals ) , key = vals )
451
+ vstr = vstr .format (url = vuri , key = vals )
437
452
else :
438
453
vstr = '{key}' .format (key = vals )
439
454
else :
440
455
vstrlist = []
441
456
for val in vals :
442
- if is_http_uri (self .unpack_uri (val )):
457
+ vuri = self .unpack_rdfobject (val , predicate = attr_uri )
458
+ if is_http_uri (vuri ):
443
459
vstr = self .link_template
444
- vstr = vstr .format (url = self . unpack_uri ( val ) , key = val )
460
+ vstr = vstr .format (url = vuri , key = val )
445
461
elif isinstance (val , Subject ):
446
462
vstr = ''
447
463
else :
@@ -461,7 +477,7 @@ def _graph_elem_attrs(self, remaining_attrs):
461
477
atype = self .link_template
462
478
type_links = []
463
479
for rdftype in self .rdf__type :
464
- type_links .append (atype .format (url = self .unpack_uri (rdftype ), key = rdftype ))
480
+ type_links .append (atype .format (url = self .unpack_rdfobject (rdftype , 'rdf__type' ), key = rdftype ))
465
481
type_links .sort ()
466
482
avar = avar .format (var = self .identity , type = ', ' .join (type_links ), attrs = attrs )
467
483
@@ -509,11 +525,6 @@ def rdfnode(self, graph):
509
525
except AssertionError :
510
526
511
527
graph .add ((selfnode , rdfpred , rdfobj ))
512
- # elif is_http_uri(self.unpack_uri(obj)):
513
- # rdfobj = rdflib.URIRef(self.unpack_uri(obj))
514
- # else:
515
- # rdfobj = rdflib.Literal(obj)
516
- # graph.add((selfnode, rdflib.URIRef(self.unpack_uri(attr)), rdfobj))
517
528
if isinstance (obj , Subject ):
518
529
obj_ref = rdflib .URIRef (obj .identity )
519
530
if (obj_ref , None , None ) not in graph :
@@ -683,7 +694,10 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None):
683
694
684
695
aliasgraph = rdflib .Graph ()
685
696
for alias in aliases :
686
- aliasgraph .parse (aliases [alias ], format = 'xml' )
697
+ try :
698
+ aliasgraph .parse (aliases [alias ], format = 'xml' )
699
+ except TypeError :
700
+ pass
687
701
# if hasattr(fhandle, 'Conventions'):
688
702
# conventions = [c.strip() for c in fhandle.Conventions.split(',')]
689
703
# for conv in conventions:
@@ -803,9 +817,10 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None):
803
817
refset .add (file_variables .get (dim ))
804
818
# Else, define a bald:childBroadcast
805
819
else :
820
+ # import pdb; pdb.set_trace()
806
821
identity = '{}_{}_ref' .format (name , dim )
807
- if baseuri is not None :
808
- identity = baseuri + '/' + '{}_{}_ref' .format (name , dim )
822
+ # if baseuri is not None:
823
+ # identity = baseuri + '/' + '{}_{}_ref'.format(name, dim)
809
824
rattrs = {}
810
825
rattrs ['rdf__type' ] = 'bald__Reference'
811
826
reshape = [1 for adim in var_shape ]
@@ -814,7 +829,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None):
814
829
reshape [cvi ] = fhandle .variables [dim ].size
815
830
rattrs ['bald__childBroadcast' ] = tuple (reshape )
816
831
rattrs ['bald__array' ] = set ((file_variables .get (dim ),))
817
- ref_node = Subject (baseuri , name , rattrs ,
832
+ ref_node = Subject (baseuri , identity , rattrs ,
818
833
prefixes = prefixes ,
819
834
aliases = aliases ,
820
835
alias_graph = aliasgraph )
@@ -827,23 +842,23 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None):
827
842
return root_container
828
843
829
844
830
- def validate_netcdf (afilepath , cache = None ):
845
+ def validate_netcdf (afilepath , cache = None , baseuri = None ):
831
846
"""
832
847
Validate a file with respect to binary-array-linked-data.
833
848
Returns a :class:`bald.validation.Validation`
834
849
835
850
"""
836
- root_container = load_netcdf (afilepath )
851
+ root_container = load_netcdf (afilepath , baseuri = baseuri )
837
852
return validate (root_container , cache = cache )
838
853
839
854
840
- def validate_hdf5 (afilepath ):
855
+ def validate_hdf5 (afilepath , cache = None , baseuri = None ):
841
856
"""
842
857
Validate a file with respect to binary-array-linked-data.
843
858
Returns a :class:`bald.validation.Validation`
844
859
845
860
"""
846
- root_container = load_hdf5 (afilepath )
861
+ root_container = load_hdf5 (afilepath , baseuri = baseuri )
847
862
return validate (root_container )
848
863
849
864
def validate (root_container , sval = None , cache = None ):
@@ -881,15 +896,20 @@ def careful_update(adict, bdict):
881
896
adict .update (bdict )
882
897
return adict
883
898
884
- def load_hdf5 (afilepath , uri = None ):
899
+ def load_hdf5 (afilepath , baseuri = None , alias_dict = None ):
885
900
with load (afilepath ) as fhandle :
886
901
# unused?
887
902
cache = {}
888
- root_container , file_variables = _hdf_group (fhandle , uri = uri )
903
+ if baseuri is None :
904
+ baseuri = 'file://{}' .format (afilepath )
905
+
906
+ root_container , file_variables = _hdf_group (fhandle , baseuri = baseuri ,
907
+ alias_dict = alias_dict )
889
908
_hdf_references (fhandle , root_container , file_variables )
890
909
return root_container
891
910
892
- def _hdf_group (fhandle , id = 'root' , uri = None , prefixes = None , aliases = None ):
911
+ def _hdf_group (fhandle , identity = 'root' , baseuri = None , prefixes = None ,
912
+ aliases = None , alias_dict = None ):
893
913
894
914
prefix_group = fhandle .attrs .get ('bald__isPrefixedBy' )
895
915
if prefixes is None :
@@ -899,14 +919,14 @@ def _hdf_group(fhandle, id='root', uri=None, prefixes=None, aliases=None):
899
919
alias_group = fhandle .attrs .get ('bald__isAliasedBy' )
900
920
if aliases is None :
901
921
aliases = {}
922
+ if alias_dict is None :
923
+ alias_dict = {}
902
924
if alias_group :
903
925
aliases = careful_update (aliases , dict (fhandle [alias_group ].attrs ))
904
926
attrs = dict (fhandle .attrs )
905
- if uri is not None :
906
- identity = uri + id
907
- else :
908
- identity = id
909
- root_container = Container (identity , attrs , prefixes = prefixes , aliases = aliases )
927
+ aliasgraph = rdflib .Graph ()
928
+ root_container = Container (baseuri , identity , attrs , prefixes = prefixes ,
929
+ aliases = aliases , alias_graph = aliasgraph )
910
930
911
931
root_container .attrs ['bald__contains' ] = []
912
932
@@ -918,14 +938,14 @@ def _hdf_group(fhandle, id='root', uri=None, prefixes=None, aliases=None):
918
938
(alias_group and dataset == fhandle [alias_group ]))
919
939
if not skip :
920
940
if isinstance (dataset , h5py ._hl .group .Group ):
921
- new_cont , new_fvars = _hdf_group (dataset , name , uri , prefixes , aliases )
941
+ new_cont , new_fvars = _hdf_group (dataset , name , baseuri , prefixes , aliases )
922
942
root_container .attrs ['bald__contains' ].append (new_cont )
923
943
file_variables = careful_update (file_variables , new_fvars )
924
944
#if hasattr(dataset, 'shape'):
925
945
elif isinstance (dataset , h5py ._hl .dataset .Dataset ):
926
946
sattrs = dict (dataset .attrs )
927
947
sattrs ['bald__shape' ] = dataset .shape
928
- dset = Array (name , sattrs , prefixes , aliases )
948
+ dset = Array (baseuri , name , sattrs , prefixes , aliases , aliasgraph )
929
949
root_container .attrs ['bald__contains' ].append (dset )
930
950
file_variables [dataset .name ] = dset
931
951
return root_container , file_variables
0 commit comments