Skip to content

Commit 040edac

Browse files
authored
Merge pull request #77 from marqh/refListSet
Ref list set
2 parents 06ef959 + d23d51c commit 040edac

9 files changed

+402
-104
lines changed

lib/bald/__init__.py

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy as np
1111
import pyparsing
1212
import rdflib
13+
import rdflib.collection
1314
import requests
1415
import six
1516

@@ -258,11 +259,11 @@ def __getitem__(self, item):
258259
# print('trying: {}'.format(item))
259260

260261
headers = {'Accept': 'application/rdf+xml'}
261-
self.cache[item] = requests.get(item, headers=headers, timeout=7)
262+
self.cache[item] = requests.get(item, headers=headers, timeout=11)
262263
except Exception:
263264
# print('retrying: {}'.format(item))
264265
headers = {'Accept': 'text/html'}
265-
self.cache[item] = requests.get(item, headers=headers, timeout=7)
266+
self.cache[item] = requests.get(item, headers=headers, timeout=11)
266267

267268
# print('in {} seconds'.format(time.time() - then))
268269
return self.cache[item]
@@ -524,10 +525,12 @@ def viewgraph(self):
524525
def rdfnode(self, graph):
525526
selfnode = rdflib.URIRef(self.identity)
526527
for attr in self.attrs:
528+
list_items = []
527529
objs = self.attrs[attr]
528530
if(isinstance(objs, np.ndarray)):
529531
#try to convert np.ndarray to a list
530532
objs = objs.tolist()
533+
531534
if not (isinstance(objs, set) or isinstance(objs, list)):
532535
objs = set([objs])
533536
for obj in objs:
@@ -546,16 +549,24 @@ def rdfnode(self, graph):
546549
else:
547550
rdfobj = rdflib.Literal(rdfobj)
548551
rdfpred = rdflib.URIRef(rdfpred)
549-
try:
550-
graph.add((selfnode, rdfpred, rdfobj))
551-
552-
except AssertionError:
553-
554-
graph.add((selfnode, rdfpred, rdfobj))
552+
if isinstance(objs, set):
553+
try:
554+
graph.add((selfnode, rdfpred, rdfobj))
555+
556+
except AssertionError:
557+
pass
558+
#graph.add((selfnode, rdfpred, rdfobj))
559+
elif isinstance(objs, list):
560+
list_items.append(rdfobj)
555561
if isinstance(obj, Subject):
556562
obj_ref = rdflib.URIRef(obj.identity)
557563
if (obj_ref, None, None) not in graph:
558564
graph = obj.rdfnode(graph)
565+
if list_items:
566+
list_name = rdflib.BNode()
567+
col = rdflib.collection.Collection(graph, list_name, list_items)
568+
569+
graph.add((selfnode, rdfpred, list_name))
559570

560571
return graph
561572

@@ -763,7 +774,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
763774
root_container = Container(baseuri, '', attrs, prefixes=prefixes,
764775
aliases=aliases, alias_graph=aliasgraph)
765776

766-
root_container.attrs['bald__contains'] = []
777+
root_container.attrs['bald__contains'] = set()
767778
file_variables = {}
768779
for name in fhandle.variables:
769780
if name == prefix_var_name:
@@ -855,7 +866,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
855866
else:
856867
var = Subject(baseuri, name, sattrs, prefixes=prefixes,
857868
aliases=aliases, alias_graph=aliasgraph)
858-
root_container.attrs['bald__contains'].append(var)
869+
root_container.attrs['bald__contains'].add(var)
859870
file_variables[name] = var
860871

861872

@@ -887,10 +898,18 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
887898
'filter(?rtype = owl:Class) '
888899
'}')
889900

901+
qstr = ('prefix bald: <http://binary-array-ld.net/latest/> '
902+
'prefix skos: <http://www.w3.org/2004/02/skos/core#> '
903+
'prefix owl: <http://www.w3.org/2002/07/owl#> '
904+
'select ?s '
905+
'where { '
906+
' ?s rdfs:range ?type . '
907+
'filter(?type in (rdfs:Literal, skos:Concept)) '
908+
'}')
909+
890910
refs = reference_graph.query(qstr)
891911

892-
ref_prefs = [str(ref[0]) for ref in list(refs)]
893-
912+
non_ref_prefs = [str(ref[0]) for ref in list(refs)]
894913

895914
# cycle again and find references
896915
for name in fhandle.variables:
@@ -906,30 +925,29 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
906925

907926
# for sattr in sattrs:
908927
for sattr in (sattr for sattr in sattrs if
909-
root_container.unpack_predicate(sattr) in ref_prefs):
910-
# if sattr == 'coordinates':
911-
# import pdb; pdb.set_trace()
928+
root_container.unpack_predicate(sattr) not in non_ref_prefs):
912929

913930
if (isinstance(sattrs[sattr], six.string_types) and
914931
file_variables.get(sattrs[sattr])):
915932
# next: remove all use of set, everything is dict or orderedDict
916933
var.attrs[sattr] = set((file_variables.get(sattrs[sattr]),))
917934
elif isinstance(sattrs[sattr], six.string_types):
918-
potrefs_list = sattrs[sattr].split(',')
919-
potrefs_set = sattrs[sattr].split(' ')
920-
if len(potrefs_list) > 1:
921-
refs = np.array([file_variables.get(pref) is not None
922-
for pref in potrefs_list])
923-
if np.all(refs):
924-
var.attrs[sattr] = [file_variables.get(pref)
925-
for pref in potrefs_list]
926-
927-
elif len(potrefs_set) > 1:
928-
refs = np.array([file_variables.get(pref) is not None
929-
for pref in potrefs_set])
930-
if np.all(refs):
931-
var.attrs[sattr] = set([file_variables.get(pref)
932-
for pref in potrefs_set])
935+
if sattrs[sattr].startswith('(') and sattrs[sattr].endswith(')'):
936+
potrefs_list = sattrs[sattr].lstrip('( ').rstrip(' )').split(' ')
937+
if len(potrefs_list) > 1:
938+
refs = np.array([file_variables.get(pref) is not None
939+
for pref in potrefs_list])
940+
if np.all(refs):
941+
var.attrs[sattr] = [file_variables.get(pref)
942+
for pref in potrefs_list]
943+
else:
944+
potrefs_set = sattrs[sattr].split(' ')
945+
if len(potrefs_set) > 1:
946+
refs = np.array([file_variables.get(pref) is not None
947+
for pref in potrefs_set])
948+
if np.all(refs):
949+
var.attrs[sattr] = set([file_variables.get(pref)
950+
for pref in potrefs_set])
933951

934952
# coordinate variables are bald__references except for
935953
# variables that already declare themselves as bald__Reference
@@ -961,7 +979,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
961979
prefixes=prefixes,
962980
aliases=aliases,
963981
alias_graph=aliasgraph)
964-
root_container.attrs['bald__contains'].append(ref_node)
982+
root_container.attrs['bald__contains'].add(ref_node)
965983
file_variables[name] = ref_node
966984
refset.add(ref_node)
967985
var.attrs['bald__references'] = refset
@@ -1000,7 +1018,7 @@ def validate(root_container, sval=None, cache=None):
10001018

10011019
root_val = bv.ContainerValidation(subject=root_container, httpcache=cache)
10021020
sval.stored_exceptions += root_val.exceptions()
1003-
for subject in root_container.attrs.get('bald__contains', []):
1021+
for subject in root_container.attrs.get('bald__contains', set()):
10041022
if isinstance(subject, Array):
10051023
array_val = bv.ArrayValidation(subject, httpcache=cache)
10061024
sval.stored_exceptions += array_val.exceptions()
@@ -1060,7 +1078,7 @@ def _hdf_group(fhandle, identity='root', baseuri=None, prefixes=None,
10601078
root_container = Container(baseuri, identity, attrs, prefixes=prefixes,
10611079
aliases=aliases, alias_graph=aliasgraph)
10621080

1063-
root_container.attrs['bald__contains'] = []
1081+
root_container.attrs['bald__contains'] = set()
10641082

10651083
file_variables = {}
10661084
# iterate through the datasets and groups
@@ -1071,14 +1089,14 @@ def _hdf_group(fhandle, identity='root', baseuri=None, prefixes=None,
10711089
if not skip:
10721090
if isinstance(dataset, h5py._hl.group.Group):
10731091
new_cont, new_fvars = _hdf_group(dataset, name, baseuri, prefixes, aliases)
1074-
root_container.attrs['bald__contains'].append(new_cont)
1092+
root_container.attrs['bald__contains'].add(new_cont)
10751093
file_variables = careful_update(file_variables, new_fvars)
10761094
#if hasattr(dataset, 'shape'):
10771095
elif isinstance(dataset, h5py._hl.dataset.Dataset):
10781096
sattrs = dict(dataset.attrs)
10791097
sattrs['bald__shape'] = dataset.shape
10801098
dset = Array(baseuri, name, sattrs, prefixes, aliases, aliasgraph)
1081-
root_container.attrs['bald__contains'].append(dset)
1099+
root_container.attrs['bald__contains'].add(dset)
10821100
file_variables[dataset.name] = dset
10831101
return root_container, file_variables
10841102

lib/bald/tests/integration/CDL/ereefs_gbr4_ncld.cdl

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,19 +126,11 @@ variables:
126126
:metadata_link = "http://marlin.csiro.au/geonetwork/srv/eng/search?&uuid=72020224-f086-434a-bbe9-a222c8e5cf0d" ;
127127
:rdf__type = "bald__Container" ;
128128
:bald__isPrefixedBy = "prefix_list" ;
129-
:bald__isAliasedBy = "alias_list" ;
130129

131130
group: prefix_list {
132131
// group attributes:
133132
:bald__ = "http://binary-array-ld.net/latest/" ;
134133
:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
135134
} // group bald__prefix_list
136-
137-
group: alias_list {
138-
// group attributes:
139-
:qudt = "http://qudt.org/1.1/schema/qudt";
140-
:ed_gov_au_op = "http://environment.data.gov.au/def/op" ;
141-
142-
} // group bald__alias_list
143135

144136
}

lib/bald/tests/integration/CDL/multi_array_reference.cdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ variables:
3636
set_collection:bald__references = "data_variable1 data_variable2" ;
3737

3838
int list_collection ;
39-
list_collection:bald__references = "data_variable1,data_variable2" ;
39+
list_collection:bald__references = "( data_variable1 data_variable2 )" ;
4040

4141

4242
// global attributes:
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
@prefix CFTerms: <http://def.scitools.org.uk/CFTerms/> .
2+
@prefix DA: <https://codes.nws.noaa.gov/DataAssimilation> .
3+
@prefix NWP: <https://codes.nws.noaa.gov/NumericalWeatherPrediction> .
4+
@prefix StatPP: <https://codes.nws.noaa.gov/StatisticalPostProcessing> .
5+
@prefix bald: <http://binary-array-ld.net/latest/> .
6+
@prefix cf_sname: <http://vocab.nerc.ac.uk/standard_name/> .
7+
@prefix ns1: <file://CDL/ProcessChain0300.cdl/> .
8+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
9+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
10+
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
11+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
12+
13+
<file://CDL/ProcessChain0300.cdl> a bald:Container ;
14+
ns1:process_chain "gfsmos_process_chain" ;
15+
bald:contains ns1:gfsmos_process_chain,
16+
ns1:step1,
17+
ns1:step2 ;
18+
bald:isPrefixedBy "prefix_list" .
19+
20+
ns1:gfsmos_process_chain a bald:Subject ;
21+
ns1:OM_Process ( ns1:step1 ns1:step2 ) .
22+
23+
ns1:step1 a bald:Subject ;
24+
ns1:LE_ProcessStep <https://codes.nws.noaa.gov/NumericalWeatherPrediction/Models/GFS13> ;
25+
ns1:LE_Source <https://codes.nws.noaa.gov/DataAssimilation/Methods/GDAS13> .
26+
27+
ns1:step2 a bald:Subject ;
28+
ns1:LE_ProcessStep <https://codes.nws.noaa.gov/StatisticalPostProcessing/Methods/GFSMOS05> ;
29+
ns1:LE_Source <https://codes.nws.noaa.gov/NumericalWeatherPrediction/Models/GFS13> .
30+

0 commit comments

Comments
 (0)