1
1
import contextlib
2
+ import copy
2
3
import re
3
4
4
5
import h5py
@@ -252,19 +253,23 @@ def check_uri(self, uri):
252
253
253
254
class Subject (object ):
254
255
_rdftype = 'bald__Subject'
255
- def __init__ (self , identity , attrs = None , prefixes = None , aliases = None ):
256
+ def __init__ (self , baseuri , relative_id , attrs = None , prefixes = None ,
257
+ aliases = None , alias_graph = None ):
256
258
"""
257
259
A subject of metadata statements.
258
260
259
261
attrs: an dictionary of key value pair attributes
260
262
"""
261
- self .identity = identity
263
+ self .baseuri = baseuri
264
+ self .relative_id = relative_id
265
+
262
266
if attrs is None :
263
267
attrs = {}
264
268
if prefixes is None :
265
269
prefixes = {}
266
270
if aliases is None :
267
271
aliases = {}
272
+
268
273
self .attrs = attrs
269
274
270
275
self .rdf__type = self ._rdftype
@@ -275,6 +280,13 @@ def __init__(self, identity, attrs=None, prefixes=None, aliases=None):
275
280
_http_p = 'http[s]?://.*'
276
281
self ._http_uri = re .compile ('{}' .format (_http_p ))
277
282
self ._http_uri_prefix = re .compile ('{}/|#' .format (_http_p ))
283
+ if alias_graph is None :
284
+ alias_graph = rdflib .Graph ()
285
+ self .alias_graph = alias_graph
286
+
287
+ @property
288
+ def identity (self ):
289
+ return '/' .join ([self .baseuri , self .relative_id ])
278
290
279
291
def __str__ (self ):
280
292
return '{}:{}: {}' .format (self .identity , type (self ), self .attrs )
@@ -283,8 +295,9 @@ def __repr__(self):
283
295
return str (self )
284
296
285
297
def __setattr__ (self , attr , value ):
286
- reserved_attrs = ['identity' , 'prefixes' , '_prefixes' , '_prefix_suffix' ,
287
- '_http_uri_prefix' , '_http_uri' , 'aliases' , 'attrs' , '_rdftype' ]
298
+ reserved_attrs = ['baseuri' , 'relative_id' , 'prefixes' , '_prefixes' ,
299
+ '_prefix_suffix' , '_http_uri_prefix' , '_http_uri' ,
300
+ 'aliases' , 'alias_graph' , 'attrs' , '_rdftype' ]
288
301
if attr in reserved_attrs :
289
302
object .__setattr__ (self , attr , value )
290
303
else :
@@ -307,6 +320,7 @@ def __getattr__(self, attr):
307
320
raise AttributeError (msg )
308
321
return self .attrs [attr ]
309
322
323
+ #@property
310
324
def prefixes (self ):
311
325
prefixes = {}
312
326
for key , value in self ._prefixes .items ():
@@ -318,23 +332,77 @@ def prefixes(self):
318
332
prefixes [pref ] = value
319
333
return prefixes
320
334
321
- def unpack_uri (self , astring ):
322
- """
323
- Return a URI for the given input string, or return the astring unchanged if
324
- none is available.
335
+ def unpack_predicate (self , astring ):
336
+ result = astring
337
+ if isinstance (astring , six .string_types ) and self ._prefix_suffix .match (astring ):
338
+ prefix , suffix = self ._prefix_suffix .match (astring ).groups ()
339
+ if prefix in self .prefixes ():
340
+ if self ._http_uri .match (self .prefixes ()[prefix ]):
341
+ result = astring .replace ('{}__' .format (prefix ),
342
+ self .prefixes ()[prefix ])
343
+ elif isinstance (astring , six .string_types ):
344
+ predicate_alias_query = ('prefix dct: <http://purl.org/dc/terms/> '
345
+ 'prefix owl: <http://www.w3.org/2002/07/owl#> '
346
+ 'select ?uri where '
347
+ '{{?uri dct:identifier "{}" ; '
348
+ ' rdf:type ?type. '
349
+ 'FILTER(?type in (rdf:Property, owl:ObjectProperty) ) '
350
+ '}}' .format (astring ))
351
+
352
+ qres = self .alias_graph .query (predicate_alias_query )
353
+ results = list (qres )
354
+ if len (results ) > 1 :
355
+ raise ValueError ('multiple alias options' )
356
+ elif len (results ) == 1 :
357
+ result = str (results [0 ][0 ])
358
+ if result == astring :
359
+ result = self .baseuri + '/' + result
360
+ return result
325
361
326
- """
362
+ def unpack_rdfobject ( self , astring , predicate ):
327
363
result = astring
328
364
if isinstance (astring , six .string_types ) and self ._prefix_suffix .match (astring ):
329
365
prefix , suffix = self ._prefix_suffix .match (astring ).groups ()
330
366
if prefix in self .prefixes ():
331
367
if self ._http_uri .match (self .prefixes ()[prefix ]):
332
368
result = astring .replace ('{}__' .format (prefix ),
333
369
self .prefixes ()[prefix ])
334
- elif isinstance (astring , six .string_types ) and astring in self .aliases :
335
- result = self .aliases [astring ]
370
+ elif isinstance (astring , six .string_types ):
371
+ # if not is_http_uri(predicate):
372
+ # msg = 'predicate must be a http uri, not {}'.format(predicate)
373
+ # raise ValueError(msg)
374
+ # can be a file uri too
375
+ rdfobj_alias_query = ('prefix dct: <http://purl.org/dc/terms/> '
376
+ 'select ?uri where '
377
+ '{{ <{pred}> rdfs:range ?range . '
378
+ '?uri dct:identifier "{id}" ; '
379
+ ' rdf:type ?range .'
380
+ '}}' .format (pred = predicate , id = astring ))
381
+ qres = self .alias_graph .query (rdfobj_alias_query )
382
+ results = list (qres )
383
+ if len (results ) > 1 :
384
+ raise ValueError ('multiple alias options' )
385
+ elif len (results ) == 1 :
386
+ result = str (results [0 ][0 ])
336
387
return result
337
388
389
+ # def unpack_uri(self, astring):
390
+ # """
391
+ # Return a URI for the given input string, or return the astring unchanged if
392
+ # none is available.
393
+
394
+ # """
395
+ # result = astring
396
+ # if isinstance(astring, six.string_types) and self._prefix_suffix.match(astring):
397
+ # prefix, suffix = self._prefix_suffix.match(astring).groups()
398
+ # if prefix in self.prefixes():
399
+ # if self._http_uri.match(self.prefixes()[prefix]):
400
+ # result = astring.replace('{}__'.format(prefix),
401
+ # self.prefixes()[prefix])
402
+ # elif isinstance(astring, six.string_types) and astring in self.aliases:
403
+ # result = self.aliases[astring]
404
+ # return result
405
+
338
406
@property
339
407
def link_template (self ):
340
408
return '<a xlink:href="{url}" xlink:show=new text-decoration="underline">{key}</a>'
@@ -425,13 +493,27 @@ def rdfnode(self, graph):
425
493
if not (isinstance (objs , set ) or isinstance (objs , list )):
426
494
objs = set ([objs ])
427
495
for obj in objs :
496
+ rdfpred = self .unpack_predicate (attr )
428
497
if isinstance (obj , Subject ):
429
498
rdfobj = rdflib .URIRef (obj .identity )
430
- elif is_http_uri (self .unpack_uri (obj )):
431
- rdfobj = rdflib .URIRef (self .unpack_uri (obj ))
432
499
else :
433
- rdfobj = rdflib .Literal (obj )
434
- graph .add ((selfnode , rdflib .URIRef (self .unpack_uri (attr )), rdfobj ))
500
+ rdfobj = self .unpack_rdfobject (obj , rdfpred )
501
+ if is_http_uri (rdfobj ):
502
+ rdfobj = rdflib .URIRef (rdfobj )
503
+ else :
504
+ rdfobj = rdflib .Literal (rdfobj )
505
+ rdfpred = rdflib .URIRef (rdfpred )
506
+ try :
507
+ graph .add ((selfnode , rdfpred , rdfobj ))
508
+
509
+ except AssertionError :
510
+
511
+ graph .add ((selfnode , rdfpred , rdfobj ))
512
+ # elif is_http_uri(self.unpack_uri(obj)):
513
+ # rdfobj = rdflib.URIRef(self.unpack_uri(obj))
514
+ # else:
515
+ # rdfobj = rdflib.Literal(obj)
516
+ # graph.add((selfnode, rdflib.URIRef(self.unpack_uri(attr)), rdfobj))
435
517
if isinstance (obj , Subject ):
436
518
obj_ref = rdflib .URIRef (obj .identity )
437
519
if (obj_ref , None , None ) not in graph :
@@ -446,11 +528,23 @@ def rdfgraph(self):
446
528
"""
447
529
graph = rdflib .Graph ()
448
530
graph .bind ('bald' , 'http://binary-array-ld.net/latest/' )
449
- for prefix_name in self ._prefixes :
450
- #strip the double underscore suffix
451
- new_name = prefix_name [:- 2 ]
531
+ for prefix_name in self .prefixes ():
532
+
533
+ #strip the double underscore suffix
534
+
535
+ # new_name = prefix_name[:-2]
452
536
453
- graph .bind (new_name , self ._prefixes [prefix_name ])
537
+ graph .bind (prefix_name , self .prefixes ()[prefix_name ])
538
+
539
+ for alias_name in self .aliases :
540
+ # hack :S
541
+ uri = self .aliases [alias_name ]
542
+ if '?_format' in uri :
543
+ uri = uri .split ('?' )[0 ]
544
+ if not (uri .endswith ('#' ) or uri .endswith ('/' )):
545
+ uri = uri + '/'
546
+ graph .bind (alias_name , uri )
547
+
454
548
graph = self .rdfnode (graph )
455
549
456
550
return graph
@@ -532,12 +626,13 @@ def load(afilepath):
532
626
finally :
533
627
f .close ()
534
628
535
- def load_netcdf (afilepath , baseuri = None ):
629
+ def load_netcdf (afilepath , baseuri = None , alias_dict = None ):
536
630
"""
537
631
Load a file with respect to binary-array-linked-data.
538
632
Returns a :class:`bald.Collection`
539
633
"""
540
-
634
+ if alias_dict == None :
635
+ alias_dict = {}
541
636
with load (afilepath ) as fhandle :
542
637
if baseuri is None :
543
638
baseuri = 'file://{}' .format (afilepath )
@@ -581,36 +676,39 @@ def load_netcdf(afilepath, baseuri=None):
581
676
if isinstance (alias_var , netCDF4 ._netCDF4 .Variable ):
582
677
skipped_variables .append (alias_var .name )
583
678
679
+ aliases = careful_update (aliases , alias_dict )
584
680
attrs = {}
585
681
for k in fhandle .ncattrs ():
586
682
attrs [k ] = getattr (fhandle , k )
587
- # process Conventions
588
- # Conventions = "CF-1.6, ACDD-1.3"
683
+
589
684
aliasgraph = rdflib .Graph ()
590
- if hasattr (fhandle , 'Conventions' ):
591
- conventions = [c .strip () for c in fhandle .Conventions .split (',' )]
592
- for conv in conventions :
593
- if conv .startswith ('CF-' ):
594
- uri = 'http://def.scitools.org.uk/CFTerms?_format=ttl'
595
- result = aliasgraph .parse (uri )
596
- qstr = ('select ?alias ?uri where '
597
- '{?uri dct:identifier ?alias .}' )
598
- qres = aliasgraph .query (qstr )
599
-
600
- new_aliases = [(str (q [0 ]), str (q [1 ])) for q in list (qres )]
601
- na_keys = [n [0 ] for n in new_aliases ]
602
- if len (set (na_keys )) != len (na_keys ):
603
- raise ValueError ('duplicate aliases' )
604
- aliases = careful_update (aliases , dict (new_aliases ))
605
-
685
+ for alias in aliases :
686
+ aliasgraph .parse (aliases [alias ], format = 'xml' )
687
+ # if hasattr(fhandle, 'Conventions'):
688
+ # conventions = [c.strip() for c in fhandle.Conventions.split(',')]
689
+ # for conv in conventions:
690
+ # if conv.startswith('CF-'):
691
+ # uri = 'http://def.scitools.org.uk/CFTerms?_format=ttl'
692
+ # aliasgraph.parse(uri)
693
+ # uri = 'http://vocab.nerc.ac.uk/standard_name/'
694
+ # aliasgraph.parse(uri, format='xml')
695
+ # qstr = ('select ?alias ?uri where '
696
+ # '{?uri dct:identifier ?alias .}')
697
+ # qres = aliasgraph.query(qstr)
698
+
699
+ # new_aliases = [(str(q[0]), str(q[1])) for q in list(qres)]
700
+ # na_keys = [n[0] for n in new_aliases]
701
+ # if len(set(na_keys)) != len(na_keys):
702
+ # raise ValueError('duplicate aliases')
703
+ # aliases = careful_update(aliases, dict(new_aliases))
606
704
607
- root_container = Container (identity , attrs , prefixes = prefixes ,
608
- aliases = aliases )
705
+ root_container = Container (baseuri , '' , attrs , prefixes = prefixes ,
706
+ aliases = aliases , alias_graph = aliasgraph )
609
707
610
708
root_container .attrs ['bald__contains' ] = []
611
709
file_variables = {}
612
710
for name in fhandle .variables :
613
- if name == prefix_var_name or name == alias_var_name :
711
+ if name == prefix_var_name :
614
712
continue
615
713
616
714
sattrs = fhandle .variables [name ].__dict__ .copy ()
@@ -627,21 +725,25 @@ def load_netcdf(afilepath, baseuri=None):
627
725
628
726
if fhandle .variables [name ].shape :
629
727
sattrs ['bald__shape' ] = fhandle .variables [name ].shape
630
- var = Array (identity , sattrs , prefixes = prefixes , aliases = aliases )
728
+ var = Array (baseuri , name , sattrs , prefixes = prefixes ,
729
+ aliases = aliases , alias_graph = aliasgraph )
631
730
else :
632
- var = Subject (identity , sattrs , prefixes = prefixes , aliases = aliases )
731
+ var = Subject (baseuri , name , sattrs , prefixes = prefixes ,
732
+ aliases = aliases , alias_graph = aliasgraph )
633
733
root_container .attrs ['bald__contains' ].append (var )
634
734
file_variables [name ] = var
635
735
636
736
637
737
reference_prefixes = dict ()
638
- reference_graph = aliasgraph
738
+ reference_graph = copy . copy ( aliasgraph )
639
739
reference_graph .parse ('http://binary-array-ld.net/latest?_format=ttl' )
640
740
qstr = ('prefix bald: <http://binary-array-ld.net/latest/> '
741
+ 'prefix skos: <http://www.w3.org/2004/02/skos/core#> '
641
742
'select ?s '
642
743
'where { '
643
744
' ?s rdfs:range ?type . '
644
- 'filter(?type != rdfs:Literal)'
745
+ 'filter(?type != rdfs:Literal) '
746
+ 'filter(?type != skos:Concept) '
645
747
'}' )
646
748
refs = reference_graph .query (qstr )
647
749
@@ -650,7 +752,7 @@ def load_netcdf(afilepath, baseuri=None):
650
752
651
753
# cycle again and find references
652
754
for name in fhandle .variables :
653
- if name == prefix_var_name or name == alias_var_name :
755
+ if name == prefix_var_name :
654
756
continue
655
757
656
758
var = file_variables [name ]
@@ -662,7 +764,7 @@ def load_netcdf(afilepath, baseuri=None):
662
764
663
765
# for sattr in sattrs:
664
766
for sattr in (sattr for sattr in sattrs if
665
- root_container .unpack_uri (sattr ) in ref_prefs ):
767
+ root_container .unpack_predicate (sattr ) in ref_prefs ):
666
768
# if sattr == 'coordinates':
667
769
# import pdb; pdb.set_trace()
668
770
@@ -712,7 +814,10 @@ def load_netcdf(afilepath, baseuri=None):
712
814
reshape [cvi ] = fhandle .variables [dim ].size
713
815
rattrs ['bald__childBroadcast' ] = tuple (reshape )
714
816
rattrs ['bald__array' ] = set ((file_variables .get (dim ),))
715
- ref_node = Subject (identity , rattrs , prefixes = prefixes , aliases = aliases )
817
+ ref_node = Subject (baseuri , name , rattrs ,
818
+ prefixes = prefixes ,
819
+ aliases = aliases ,
820
+ alias_graph = aliasgraph )
716
821
root_container .attrs ['bald__contains' ].append (ref_node )
717
822
file_variables [name ] = ref_node
718
823
refset .add (ref_node )
0 commit comments