Skip to content

Commit b25b2ec

Browse files
authored
Merge pull request #71 from jyucsiro/load-tests-triple-store
Load tests triple store
2 parents a667797 + 751beb4 commit b25b2ec

File tree

11 files changed

+278
-39
lines changed

11 files changed

+278
-39
lines changed

lib/bald/__init__.py

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,9 @@ def rdfnode(self, graph):
414414
selfnode = rdflib.URIRef(self.identity)
415415
for attr in self.attrs:
416416
objs = self.attrs[attr]
417+
if(isinstance(objs, np.ndarray)):
418+
#try to convert np.ndarray to a list
419+
objs = objs.tolist()
417420
if not (isinstance(objs, set) or isinstance(objs, list)):
418421
objs = set([objs])
419422
for obj in objs:
@@ -438,6 +441,11 @@ def rdfgraph(self):
438441
"""
439442
graph = rdflib.Graph()
440443
graph.bind('bald', 'http://binary-array-ld.net/latest/')
444+
for prefix_name in self._prefixes:
445+
#strip the double underscore suffix
446+
new_name = prefix_name[:-2]
447+
448+
graph.bind(new_name, self._prefixes[prefix_name])
441449
graph = self.rdfnode(graph)
442450

443451
return graph
@@ -530,75 +538,96 @@ def load(afilepath):
530538
finally:
531539
f.close()
532540

533-
def load_netcdf(afilepath, uri=None):
541+
def load_netcdf(afilepath, baseuri=None):
534542
"""
535543
Validate a file with respect to binary-array-linked-data.
536544
Returns a :class:`bald.validation.Validation`
537545
"""
538546

539547
with load(afilepath) as fhandle:
540-
prefix_group = (fhandle[fhandle.bald__isPrefixedBy] if
548+
prefix_var_name = None
549+
if hasattr(fhandle, 'bald__isPrefixedBy'):
550+
prefix_var_name = fhandle.bald__isPrefixedBy
551+
552+
prefix_var = (fhandle[fhandle.bald__isPrefixedBy] if
541553
hasattr(fhandle, 'bald__isPrefixedBy') else {})
542554
prefixes = {}
543555

544556
skipped_variables = []
545-
if prefix_group != {}:
546-
prefixes = (dict([(prefix, getattr(prefix_group, prefix)) for
547-
prefix in prefix_group.ncattrs()]))
548-
if isinstance(prefix_group, netCDF4._netCDF4.Variable):
549-
skipped_variables.append(prefix_group.name)
557+
if prefix_var != {}:
558+
prefixes = (dict([(prefix, getattr(prefix_var, prefix)) for
559+
prefix in prefix_var.ncattrs()]))
560+
if isinstance(prefix_var, netCDF4._netCDF4.Variable):
561+
skipped_variables.append(prefix_var.name)
550562
else:
551563
for k in fhandle.ncattrs():
552564
if k.endswith('__'):
553565
prefixes[k] = getattr(fhandle, k)
554-
alias_group = (fhandle[fhandle.bald__isAliasedBy]
566+
567+
# check that default set is handled, i.e. bald__ and rdf__
568+
if 'bald__' not in prefixes:
569+
prefixes['bald__'] = "http://binary-array-ld.net/latest/"
570+
571+
if 'rdf__' not in prefixes:
572+
prefixes['rdf__'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
573+
574+
alias_var_name = None
575+
if hasattr(fhandle, 'bald__isAliasedBy'):
576+
alias_var_name = fhandle.bald__isAliasedBy
577+
578+
alias_var = (fhandle[fhandle.bald__isAliasedBy]
555579
if hasattr(fhandle, 'bald__isAliasedBy') else {})
556580
aliases = {}
557-
if alias_group != {}:
558-
aliases = (dict([(alias, getattr(alias_group, alias))
559-
for alias in alias_group.ncattrs()]))
560-
if isinstance(alias_group, netCDF4._netCDF4.Variable):
561-
skipped_variables.append(alias_group.name)
581+
if alias_var != {}:
582+
aliases = (dict([(alias, getattr(alias_var, alias))
583+
for alias in alias_var.ncattrs()]))
584+
if isinstance(alias_var, netCDF4._netCDF4.Variable):
585+
skipped_variables.append(alias_var.name)
562586

563587
attrs = {}
564588
for k in fhandle.ncattrs():
565589
attrs[k] = getattr(fhandle, k)
566590
# It would be nice to use the URI of the file if it is known.
567-
if uri is not None:
568-
identity = uri
591+
if baseuri is not None:
592+
identity = baseuri
569593
else:
570594
identity = 'root'
571595
root_container = Container(identity, attrs, prefixes=prefixes,
572596
aliases=aliases)
573597
root_container.attrs['bald__contains'] = []
574598
file_variables = {}
575599
for name in fhandle.variables:
600+
if name == prefix_var_name or name == alias_var_name:
601+
continue
576602

577603
sattrs = fhandle.variables[name].__dict__.copy()
578604
# inconsistent use of '/'; fix it
579605
identity = name
606+
if baseuri is not None:
607+
identity = baseuri + "/" + name
580608

581609
# netCDF coordinate variable special case
582610
if (len(fhandle.variables[name].dimensions) == 1 and
583611
fhandle.variables[name].dimensions[0] == name):
584-
sattrs['bald__array'] = name
612+
#sattrs['bald__array'] = name
613+
sattrs['bald__array'] = identity
585614
sattrs['rdf__type'] = 'bald__Reference'
615+
586616
if fhandle.variables[name].shape:
587617
sattrs['bald__shape'] = fhandle.variables[name].shape
588618
var = Array(identity, sattrs, prefixes=prefixes, aliases=aliases)
589619
else:
590620
var = Subject(identity, sattrs, prefixes=prefixes, aliases=aliases)
591-
if name not in skipped_variables:
592-
# Don't include skipped variables, such as prefix or alias
593-
# variables, within the containment relation.
594-
root_container.attrs['bald__contains'].append(var)
595-
621+
root_container.attrs['bald__contains'].append(var)
596622
file_variables[name] = var
597623

598624

599625

600626
# cycle again and find references
601627
for name in fhandle.variables:
628+
if name == prefix_var_name or name == alias_var_name:
629+
continue
630+
602631
var = file_variables[name]
603632
# reverse lookup based on type to be added
604633
lookups = ['bald__references', 'bald__array']
@@ -624,6 +653,8 @@ def load_netcdf(afilepath, uri=None):
624653
# Else, define a bald:childBroadcast
625654
else:
626655
identity = '{}_{}_ref'.format(name, dim)
656+
if baseuri is not None:
657+
identity = baseuri + '/' + '{}_{}_ref'.format(name, dim)
627658
rattrs = {}
628659
rattrs['rdf__type'] = 'bald__Reference'
629660
reshape = [1 for adim in var_shape]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
netcdf tmpMwXy8U {
2+
dimensions:
3+
pdim0 = 11 ;
4+
pdim1 = 17 ;
5+
variables:
6+
int prefix_list ;
7+
prefix_list:bald__ = "http://binary-array-ld.net/latest/" ;
8+
prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
9+
prefix_list:rdfs__ = "http://www.w3.org/2000/01/rdf-schema#" ;
10+
prefix_list:cf__ = "http://def.scitools.org.uk/CFTerms/" ;
11+
prefix_list:geo__ = "http://www.opengis.net/ont/geosparql#" ;
12+
13+
int temp(pdim0, pdim1) ;
14+
temp:cf__standard_name = "air_temperature" ;
15+
temp:cf__long_name = "Air temperature obs example at point" ;
16+
temp:rdfs__label = "Air temperature obs example at point" ;
17+
temp:geo__asWKT = "POINT(-77.03524 38.889468)" ;
18+
19+
int pressure(pdim0, pdim1) ;
20+
pressure:cf__standard_name = "air_pressure" ;
21+
pressure:cf__long_name = "Air pressure at UCAR Centre Green" ;
22+
pressure:rdfs__label = "Air pressure at UCAR Centre Green" ;
23+
pressure:geo__asWKT = "POINT(-105.24584700000003 40.0315278)" ;
24+
25+
// global attributes:
26+
:_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ;
27+
:rdf__type = "bald__Container" ;
28+
:bald__isPrefixedBy = "prefix_list" ;
29+
data:
30+
31+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
netcdf tmpMwXy8U {
2+
dimensions:
3+
pdim0 = 11 ;
4+
pdim1 = 17 ;
5+
variables:
6+
int prefix_list ;
7+
prefix_list:bald__ = "http://binary-array-ld.net/latest/" ;
8+
prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
9+
prefix_list:rdfs__ = "http://www.w3.org/2000/01/rdf-schema#" ;
10+
prefix_list:cf__ = "http://def.scitools.org.uk/CFTerms/" ;
11+
prefix_list:geo__ = "http://www.opengis.net/ont/geosparql#" ;
12+
13+
int temp(pdim0, pdim1) ;
14+
temp:cf__standard_name = "air_temperature" ;
15+
temp:cf__long_name = "Air temperature obs example at point" ;
16+
temp:rdfs__label = "Air temperature obs example at point" ;
17+
temp:geo__asWKT = "POINT(-77.03524 38.889468)" ;
18+
19+
int pressure(pdim0, pdim1) ;
20+
:rdf__type = "geo__Geometry" ;
21+
pressure:cf__standard_name = "air_pressure" ;
22+
pressure:cf__long_name = "Air pressure at UCAR Centre Green" ;
23+
pressure:rdfs__label = "Air pressure at UCAR Centre Green" ;
24+
pressure:geo__asWKT = "POINT(-105.24584700000003 40.0315278)" ;
25+
26+
// global attributes:
27+
:_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ;
28+
:rdf__type = "bald__Container" ;
29+
:bald__isPrefixedBy = "prefix_list" ;
30+
data:
31+
32+
}

lib/bald/tests/integration/CDL/multi_array_reference.cdl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ dimensions:
33
pdim0 = 11 ;
44
pdim1 = 17 ;
55
variables:
6+
int prefix_list(pdim0, pdim1) ;
7+
prefix_list:bald__ = "http://binary-array-ld.net/latest/" ;
8+
prefix_list:metce__ = "http://codes.wmo.int/common/observation-type/METCE/2013/" ;
9+
prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
10+
611
int variable1(pdim0, pdim1) ;
712
variable1:bald__references = "location_variable" ;
813
variable1:long_name = "Gerald";
@@ -31,11 +36,4 @@ variables:
3136
:_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ;
3237
:bald__isPrefixedBy = "prefix_list" ;
3338

34-
group: prefix_list {
35-
36-
// group attributes:
37-
:bald__ = "http://binary-array-ld.net/latest/" ;
38-
:metce__ = "http://codes.wmo.int/common/observation-type/METCE/2013/" ;
39-
:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
40-
} // group bald__prefix_list
4139
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
@prefix bald: <http://binary-array-ld.net/latest/> .
2+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
3+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
4+
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
5+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
6+
7+
<http://example.org/base> a bald:Container ;
8+
bald:contains <http://example.org/base/child_variable>,
9+
<http://example.org/base/parent_variable> ;
10+
bald:isPrefixedBy "prefix_list" .
11+
12+
<http://example.org/base/parent_variable> a bald:Array ;
13+
bald:references <http://example.org/base/child_variable> ;
14+
bald:shape "(11, 17)" .
15+
16+
<http://example.org/base/child_variable> a bald:Array,
17+
bald:Reference ;
18+
bald:array <http://example.org/base/child_variable> ;
19+
bald:shape "(11, 17)" .
20+

lib/bald/tests/integration/TTL/multi_array_reference.ttl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
@prefix bald: <http://binary-array-ld.net/latest/> .
2+
@prefix metce: <http://codes.wmo.int/common/observation-type/METCE/2013/> .
23
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
34
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
45
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@@ -21,15 +22,15 @@
2122
<variable1_pdim0_ref> ;
2223
bald:shape "(11, 17)" ;
2324
<long_name> "Gerald" ;
24-
<obtype> <http://codes.wmo.int/common/observation-type/METCE/2013/SamplingObservation> .
25+
<obtype> metce:SamplingObservation .
2526

2627
<variable2> a bald:Array ;
2728
bald:references <location_variable>,
2829
<pdim1>,
2930
<variable2_pdim0_ref> ;
3031
bald:shape "(11, 17)" ;
3132
<long_name> "Imelda" ;
32-
<obtype> <http://codes.wmo.int/common/observation-type/METCE/2013/SamplingObservation> .
33+
<obtype> metce:SamplingObservation .
3334

3435
<variable1_pdim0_ref> a bald:Reference,
3536
bald:Subject ;

lib/bald/tests/integration/test_cdl.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,9 @@ def test_grid_OISST_GHRSST(self):
6464
validation = bald.validate_netcdf(tfile)
6565
exns = validation.exceptions()
6666
exns.sort()
67-
expected = ['http://www.ncdc.noaa.gov/sst is not resolving as a resource (404).',
68-
'http://www.ncdc.noaa.gov/sst/ is not resolving as a resource (404).']
67+
expected = [ 'http://doi.org/10.7289/V5SQ8XB5 is not resolving as a resource (404).',
68+
'http://www.ncdc.noaa.gov/sst is not resolving as a resource (404).',
69+
'http://www.ncdc.noaa.gov/sst/ is not resolving as a resource (404).']
6970
expected.sort()
7071
self.assertTrue(not validation.is_valid() and exns == expected,
7172
msg='{} \n!= \n{}'.format(exns, expected))

lib/bald/tests/integration/test_cdl_rdfgraph.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,24 @@ def test_array_reference(self):
1919
subprocess.check_call(['ncgen', '-o', tfile, cdl_file])
2020
root_container = bald.load_netcdf(tfile)
2121
ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8")
22-
# with open(os.path.join(self.ttl_path, 'array_reference.ttl'), 'w') as sf:
22+
# with open(os.path.join(self.ttl_path, 'array_reference2.ttl'), 'w') as sf:
2323
# sf.write(ttl)
2424
with open(os.path.join(self.ttl_path, 'array_reference.ttl'), 'r') as sf:
2525
expected_ttl = sf.read()
2626
self.assertEqual(expected_ttl, ttl)
2727

28+
def test_array_reference_with_baseuri(self):
29+
with self.temp_filename('.nc') as tfile:
30+
cdl_file = os.path.join(self.cdl_path, 'array_reference.cdl')
31+
subprocess.check_call(['ncgen', '-o', tfile, cdl_file])
32+
root_container = bald.load_netcdf(tfile, baseuri='http://example.org/base')
33+
ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8")
34+
#with open(os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'w') as sf:
35+
# sf.write(ttl)
36+
with open(os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'r') as sf:
37+
expected_ttl = sf.read()
38+
self.assertEqual(expected_ttl, ttl)
39+
2840
def test_multi_array_reference(self):
2941
with self.temp_filename('.nc') as tfile:
3042
cdl_file = os.path.join(self.cdl_path, 'multi_array_reference.cdl')
@@ -36,3 +48,15 @@ def test_multi_array_reference(self):
3648
with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'r') as sf:
3749
expected_ttl = sf.read()
3850
self.assertEqual(expected_ttl, ttl)
51+
52+
def test_ereefs(self):
53+
with self.temp_filename('.nc') as tfile:
54+
cdl_file = os.path.join(self.cdl_path, 'ereefs_gbr4_ncld.cdl')
55+
subprocess.check_call(['ncgen', '-o', tfile, cdl_file])
56+
root_container = bald.load_netcdf(tfile)
57+
try:
58+
g = root_container.rdfgraph()
59+
ttl = g.serialize(format='n3').decode("utf-8")
60+
except TypeError:
61+
self.fail("Test case could not convert ereefs CDL to RDF")
62+

0 commit comments

Comments
 (0)