Skip to content

Commit 4770f1c

Browse files
authored
Merge pull request #90 from marqh/shapeRDFlist
Shape rdf list
2 parents 47faa71 + 38e8aa4 commit 4770f1c

10 files changed

+341
-153
lines changed

lib/bald/__init__.py

Lines changed: 75 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -805,14 +805,14 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
805805
sattrs['bald__first_value'] = fhandle.variables[name][0]
806806
if np.issubdtype(sattrs['bald__first_value'], np.integer):
807807
sattrs['bald__first_value'] = int(sattrs['bald__first_value'])
808-
elif np.issubdtype(sattrs['bald__first_value'], np.float):
808+
elif np.issubdtype(sattrs['bald__first_value'], np.floating):
809809
sattrs['bald__first_value'] = float(sattrs['bald__first_value'])
810810
if (len(fhandle.variables[name]) > 1 and
811811
not isinstance(fhandle.variables[name][-1], np.ma.core.MaskedConstant)):
812812
sattrs['bald__last_value'] = fhandle.variables[name][-1]
813813
if np.issubdtype(sattrs['bald__last_value'], np.integer):
814814
sattrs['bald__last_value'] = int(sattrs['bald__last_value'])
815-
elif np.issubdtype(sattrs['bald__last_value'], np.float):
815+
elif np.issubdtype(sattrs['bald__last_value'], np.floating):
816816
sattrs['bald__last_value'] = float(sattrs['bald__last_value'])
817817

818818
# datetime special case
@@ -868,7 +868,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
868868

869869

870870
if fhandle.variables[name].shape:
871-
sattrs['bald__shape'] = fhandle.variables[name].shape
871+
sattrs['bald__shape'] = list(fhandle.variables[name].shape)
872872
var = Array(baseuri, name, sattrs, prefixes=prefixes,
873873
aliases=aliases, alias_graph=aliasgraph)
874874
else:
@@ -950,66 +950,86 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
950950
for sattr in (sattr for sattr in sattrs if
951951
root_container.unpack_predicate(sattr) in ref_prefs):
952952

953-
if (isinstance(sattrs[sattr], six.string_types) and
954-
file_variables.get(sattrs[sattr])):
955-
# next: remove all use of set, everything is dict or orderedDict
956-
var.attrs[sattr] = set((file_variables.get(sattrs[sattr]),))
957-
elif isinstance(sattrs[sattr], six.string_types):
953+
if isinstance(sattrs[sattr], six.string_types):
954+
958955
if sattrs[sattr].startswith('(') and sattrs[sattr].endswith(')'):
959956
potrefs_list = sattrs[sattr].lstrip('( ').rstrip(' )').split(' ')
960-
if len(potrefs_list) > 1:
961-
refs = np.array([file_variables.get(pref) is not None
962-
for pref in potrefs_list])
963-
if np.all(refs):
964-
var.attrs[sattr] = [file_variables.get(pref)
965-
for pref in potrefs_list]
957+
refs = np.array([file_variables.get(pref) is not None
958+
for pref in potrefs_list])
959+
if np.all(refs):
960+
var.attrs[sattr] = [file_variables.get(pref)
961+
for pref in potrefs_list]
962+
for pref in potrefs_list:
963+
_make_ref_entities(var, fhandle,
964+
pref, name, baseuri,
965+
root_container,
966+
file_variables, prefixes,
967+
aliases, aliasgraph)
968+
966969
else:
967970
potrefs_set = sattrs[sattr].split(' ')
968-
if len(potrefs_set) > 1:
969-
refs = np.array([file_variables.get(pref) is not None
970-
for pref in potrefs_set])
971-
if np.all(refs):
972-
var.attrs[sattr] = set([file_variables.get(pref)
973-
for pref in potrefs_set])
974-
975-
# coordinate variables are bald__references except for
976-
# variables that already declare themselves as bald__Reference
971+
refs = np.array([file_variables.get(pref) is not None
972+
for pref in potrefs_set])
973+
if np.all(refs):
974+
var.attrs[sattr] = set([file_variables.get(pref)
975+
for pref in potrefs_set])
976+
for pref in potrefs_set:
977+
_make_ref_entities(var, fhandle,
978+
pref, name, baseuri,
979+
root_container,
980+
file_variables, prefixes,
981+
aliases, aliasgraph)
982+
983+
984+
# coordinate variables are bald__references too
977985
if 'bald__Reference' not in var.rdf__type:
978986
for dim in fhandle.variables[name].dimensions:
979-
if file_variables.get(dim):
980-
cv_shape = fhandle.variables[dim].shape
981-
var_shape = fhandle.variables[name].shape
982-
refset = var.attrs.get('bald__references', set())
983-
# Only the dimension defining the last dimension will
984-
# broadcase correctly
985-
if var_shape[-1] == cv_shape[0]:
986-
refset.add(file_variables.get(dim))
987-
# Else, define a bald:childBroadcast
988-
else:
989-
# import pdb; pdb.set_trace()
990-
identity = '{}_{}_ref'.format(name, dim)
991-
# if baseuri is not None:
992-
# identity = baseuri + '/' + '{}_{}_ref'.format(name, dim)
993-
rattrs = {}
994-
rattrs['rdf__type'] = 'bald__Reference'
995-
reshape = [1 for adim in var_shape]
996-
997-
cvi = fhandle.variables[name].dimensions.index(dim)
998-
reshape[cvi] = fhandle.variables[dim].size
999-
rattrs['bald__childBroadcast'] = tuple(reshape)
1000-
rattrs['bald__array'] = set((file_variables.get(dim),))
1001-
ref_node = Subject(baseuri, identity, rattrs,
1002-
prefixes=prefixes,
1003-
aliases=aliases,
1004-
alias_graph=aliasgraph)
1005-
root_container.attrs['bald__contains'].add(ref_node)
1006-
file_variables[name] = ref_node
1007-
refset.add(ref_node)
1008-
var.attrs['bald__references'] = refset
1009-
987+
if file_variables.get(dim) and name != dim:
988+
_make_ref_entities(var, fhandle, dim, name,
989+
baseuri, root_container,
990+
file_variables, prefixes,
991+
aliases, aliasgraph)
1010992

1011993
return root_container
1012994

995+
def _make_ref_entities(var, fhandle, pref, name, baseuri,
996+
root_container, file_variables,
997+
prefixes, aliases, aliasgraph):
998+
shapematch = (fhandle.variables[name].shape ==
999+
fhandle.variables[pref].shape)
1000+
1001+
if (fhandle.variables[name].shape and not shapematch and
1002+
fhandle.variables[pref].shape):
1003+
try:
1004+
refset = var.attrs.get('bald__references', set())
1005+
cv_shape = fhandle.variables[pref].shape
1006+
var_shape = fhandle.variables[name].shape
1007+
identity = '{}_{}_ref'.format(name, pref)
1008+
rattrs = {}
1009+
rattrs['rdf__type'] = 'bald__Reference'
1010+
reshape = [1 for adim in var_shape]
1011+
1012+
dims = fhandle.variables[pref].dimensions
1013+
for dim in dims:
1014+
cvi = fhandle.variables[name].dimensions.index(dim)
1015+
reshape[cvi] = int(fhandle.dimensions[dim].size)
1016+
rattrs['bald__childBroadcast'] = reshape
1017+
rattrs['bald__array'] = set((file_variables.get(pref),))
1018+
ref_node = Subject(baseuri, identity, rattrs,
1019+
prefixes=prefixes,
1020+
aliases=aliases,
1021+
alias_graph=aliasgraph)
1022+
root_container.attrs['bald__contains'].add(ref_node)
1023+
file_variables[identity] = ref_node
1024+
refset.add(ref_node)
1025+
var.attrs['bald__references'] = refset
1026+
except ValueError:
1027+
# Indexing and dimension identification can fail, especially
1028+
# with unexpectedy formated files. Fail silently on load, to
1029+
# that a partial graph may be returned. Issues like this are
1030+
# deferred to validation.
1031+
pass
1032+
10131033

10141034
def validate_netcdf(afilepath, baseuri=None, cache=None, uris_resolve=False):
10151035
"""
@@ -1121,7 +1141,7 @@ def _hdf_group(fhandle, identity='root', baseuri=None, prefixes=None,
11211141
#if hasattr(dataset, 'shape'):
11221142
elif isinstance(dataset, h5py._hl.dataset.Dataset):
11231143
sattrs = dict(dataset.attrs)
1124-
sattrs['bald__shape'] = dataset.shape
1144+
sattrs['bald__shape'] = list(dataset.shape)
11251145
dset = Array(baseuri, name, sattrs, prefixes, aliases, aliasgraph)
11261146
root_container.attrs['bald__contains'].add(dset)
11271147
file_variables[dataset.name] = dset

lib/bald/tests/integration/CDL/multi_array_reference.cdl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ dimensions:
33
pdim0 = 11 ;
44
pdim1 = 17 ;
55
variables:
6-
int prefix_list(pdim0, pdim1) ;
6+
int prefix_list ;
77
prefix_list:bald__ = "http://binary-array-ld.net/latest/" ;
88
prefix_list:metce__ = "http://codes.wmo.int/common/observation-type/METCE/2013/" ;
99
prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
@@ -23,13 +23,9 @@ variables:
2323
int pdim1(pdim1) ;
2424

2525
int location_variable(pdim0, pdim1) ;
26-
location_variable:rdf__type = "bald__Reference";
27-
location_variable:bald__array = "location_variable" ;
2826
location_variable:bald__references = "location_reference_system" ;
2927

3028
int location_reference_system;
31-
location_variable:rdf__type = "bald__Reference";
32-
location_reference_system:bald__array = "location_reference_system";
3329
location_reference_system:pcode = "4897";
3430

3531
int set_collection ;

0 commit comments

Comments
 (0)