Skip to content

Commit c889e42

Browse files
committed
reimplement array reference
1 parent 5fc06ef commit c889e42

File tree

5 files changed

+568
-18
lines changed

5 files changed

+568
-18
lines changed

lib/bald/__init__.py

Lines changed: 109 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
from collections import OrderedDict
12
import contextlib
23
import copy
4+
from difflib import SequenceMatcher
5+
import operator
36
import os
47
import re
58
import time
@@ -667,6 +670,78 @@ def graph_elems(self):
667670
return instances, links
668671

669672

673+
def _merge_sequences(seq1,seq2):
674+
sm=SequenceMatcher(a=seq1,b=seq2)
675+
res = []
676+
for (op, start1, end1, start2, end2) in sm.get_opcodes():
677+
if op == 'equal' or op=='delete':
678+
#This range appears in both sequences, or only in the first one.
679+
res += seq1[start1:end1]
680+
elif op == 'insert':
681+
#This range appears in only the second sequence.
682+
res += seq2[start2:end2]
683+
elif op == 'replace':
684+
#There are different ranges in each sequence - add both.
685+
res += seq1[start1:end1]
686+
res += seq2[start2:end2]
687+
return res
688+
689+
def netcdf_shared_dimensions(source_var, target_var):
690+
result = OrderedDict((('sourceReshape', OrderedDict()),
691+
('targetReshape', OrderedDict())))
692+
source_dims = OrderedDict(zip(source_var.dimensions, source_var.shape))
693+
target_dims = OrderedDict(zip(target_var.dimensions, target_var.shape))
694+
initial = OrderedDict((('sourceReshape', source_dims),
695+
('targetReshape', target_dims)))
696+
combined_dims_unordered = OrderedDict(source_dims.items() | target_dims.items())
697+
myorder = _merge_sequences(source_var.dimensions, target_var.dimensions)
698+
ordered_dims = OrderedDict((k, combined_dims_unordered[k]) for k in myorder)
699+
result = OrderedDict((('sourceReshape', OrderedDict((k, combined_dims_unordered[k]) for k in myorder)),
700+
('targetReshape', OrderedDict((k, combined_dims_unordered[k]) for k in myorder))))
701+
for k in result:
702+
for rk in result[k]:
703+
if rk not in initial[k]:
704+
result[k][rk] = 1
705+
# check overall nValues is consistent
706+
# is this validation?
707+
# or, can this only be a code bug, given nc dims???
708+
for k in result:
709+
if six.moves.reduce(operator.mul, [i[1] for i in result[k].items()], 1) != six.moves.reduce(operator.mul, [i[1] for i in initial[k].items()], 1):
710+
raise ValueError('Reshape lists must have the same count for the multiplication of elements')
711+
return result
712+
713+
714+
715+
# def netcdf_shared_dimensions(source_var, target_var):
716+
# result = OrderedDict((('sourceReshape', OrderedDict()),
717+
# ('targetReshape', OrderedDict())))
718+
# source_dims = OrderedDict(zip(source_var.dimensions, source_var.shape))
719+
# target_dims = OrderedDict(zip(target_var.dimensions, target_var.shape))
720+
# initial = OrderedDict((('sourceReshape', source_dims),
721+
# ('targetReshape', target_dims)))
722+
723+
# if list(target_dims.keys())[0] not in source_dims.keys():
724+
# result['sourceReshape'] = OrderedDict(list(target_dims.items()) +
725+
# list(source_dims.items()))
726+
# result['targetReshape'] = OrderedDict(list(target_dims.items()) +
727+
# list(source_dims.items()))
728+
# else:
729+
# result['sourceReshape'] = OrderedDict(list(source_dims.items()) +
730+
# list(target_dims.items()))
731+
# result['targetReshape'] = OrderedDict(list(source_dims.items()) +
732+
# list(target_dims.items()))
733+
# for k in result:
734+
# for rk in result[k]:
735+
# if rk not in initial[k]:
736+
# result[k][rk] = 1
737+
# # check overall nValues is consistent
738+
# # is this validation?
739+
# # or, can this only be a code bug, given nc dims???
740+
# for k in result:
741+
# if six.moves.reduce(operator.mul, [i[1] for i in result[k].items()], 1) != six.moves.reduce(operator.mul, [i[1] for i in initial[k].items()], 1):
742+
# raise ValueError('Reshape lists must have the same count for the multiplication of elements')
743+
# return result
744+
670745
@contextlib.contextmanager
671746
def load(afilepath):
672747
if afilepath.endswith('.hdf'):
@@ -831,12 +906,13 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
831906
dtype = '{}{}'.format(fhandle.variables[name].dtype.kind,
832907
fhandle.variables[name].dtype.itemsize)
833908
fv = netCDF4.default_fillvals.get(dtype)
909+
first = None
834910
if fhandle.variables[name][0] == fv:
835911
first = np.ma.MaskedArray(fhandle.variables[name][0],
836912
mask=True)
837913
else:
838914
first = fhandle.variables[name][0]
839-
if first:
915+
if first is not None:
840916
try:
841917
first = int(first)
842918
except Exception:
@@ -1002,19 +1078,40 @@ def _make_ref_entities(var, fhandle, pref, name, baseuri,
10021078
fhandle.variables[pref].shape):
10031079
try:
10041080
refset = var.attrs.get('bald__references', set())
1005-
cv_shape = fhandle.variables[pref].shape
1006-
var_shape = fhandle.variables[name].shape
1081+
# cv_shape = fhandle.variables[pref].shape
1082+
# var_shape = fhandle.variables[name].shape
10071083
identity = '{}_{}_ref'.format(name, pref)
10081084
rattrs = {}
10091085
rattrs['rdf__type'] = 'bald__Reference'
1010-
reshape = [1 for adim in var_shape]
1011-
1012-
dims = fhandle.variables[pref].dimensions
1013-
for dim in dims:
1014-
cvi = fhandle.variables[name].dimensions.index(dim)
1015-
reshape[cvi] = int(fhandle.dimensions[dim].size)
1016-
rattrs['bald__childBroadcast'] = reshape
1017-
rattrs['bald__array'] = set((file_variables.get(pref),))
1086+
# reshape = [1 for adim in var_shape]
1087+
# reshape_name = [1 for adim in set(fhandle.variables[name].dimensions).union(set(fhandle.variables[pref].dimensions))]
1088+
# reshape_pref = [1 for adim in set(fhandle.variables[name].dimensions).union(set(fhandle.variables[pref].dimensions))]
1089+
# name_dims = OrderedDict(zip(fhandle.variables[name].dimensions, var_shape))
1090+
# pref_dims = OrderedDict(zip(fhandle.variables[pref].dimensions, cv_shape))
1091+
# combined_dims = OrderedDict(list(pref_dims.items())+list(name_dims.items()))
1092+
# reverse_combined_dims = OrderedDict(list(name_dims.items())+list(pref_dims.items()))
1093+
# # also
1094+
# # name_dims.update(pref_dims)
1095+
# # pref_dims.update(name_dims)
1096+
1097+
# dims = fhandle.variables[pref].dimensions
1098+
# for dim in dims:
1099+
# cvi = fhandle.variables[name].dimensions.index(dim)
1100+
# reshape[cvi] = int(fhandle.dimensions[dim].size)
1101+
# rattrs['bald__childBroadcast'] = reshape
1102+
# rattrs['bald__array'] = set((file_variables.get(pref),))
1103+
1104+
reshapes = netcdf_shared_dimensions(fhandle.variables[name],
1105+
fhandle.variables[pref])
1106+
rattrs['bald__sourceShape'] = list(fhandle.variables[name].shape)
1107+
rattrs['bald__targetShape'] = list(fhandle.variables[pref].shape)
1108+
sourceReshape = [i[1] for i in reshapes['sourceReshape'].items()]
1109+
if sourceReshape != list(fhandle.variables[name].shape):
1110+
rattrs['bald__sourceReshape'] = sourceReshape
1111+
targetReshape = [i[1] for i in reshapes['targetReshape'].items()]
1112+
if targetReshape != list(fhandle.variables[pref].shape):
1113+
rattrs['bald__targetReshape'] = targetReshape
1114+
rattrs['bald__target'] = set((file_variables.get(pref),))
10181115
ref_node = Subject(baseuri, identity, rattrs,
10191116
prefixes=prefixes,
10201117
aliases=aliases,
@@ -1024,6 +1121,7 @@ def _make_ref_entities(var, fhandle, pref, name, baseuri,
10241121
refset.add(ref_node)
10251122
var.attrs['bald__references'] = refset
10261123
except ValueError:
1124+
import pdb; pdb.set_trace()
10271125
# Indexing and dimension identification can fail, especially
10281126
# with unexpectedy formated files. Fail silently on load, to
10291127
# that a partial graph may be returned. Issues like this are
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
netcdf orca2_votemper {
2+
dimensions:
3+
dim0 = 148 ;
4+
dim1 = 180 ;
5+
bnds = 2 ;
6+
bnds_4 = 4 ;
7+
time = 3 ;
8+
deptht = 4 ;
9+
percentiles = 19 ;
10+
variables:
11+
float votemper(time, deptht, dim0, dim1) ;
12+
votemper:_FillValue = 9.96921e+36f ;
13+
votemper:standard_name = "sea_water_potential_temperature" ;
14+
votemper:long_name = "Temperature" ;
15+
votemper:units = "degC" ;
16+
votemper:cell_methods = "time: mean" ;
17+
votemper:coordinates = "deptht nav_lat nav_lon time" ;
18+
votemper:ancillary_variables = "votemper_pdf" ;
19+
float votemper_pdf(percentiles, time, deptht, dim0, dim1) ;
20+
votemper_pdf:_FillValue = 9.96921e+36f ;
21+
votemper_pdf:standard_name = "sea_water_potential_temperature" ;
22+
votemper_pdf:long_name = "Temperature" ;
23+
votemper_pdf:units = "degC" ;
24+
votemper_pdf:coordinates = "deptht nav_lat nav_lon time" ;
25+
float percentiles(percentiles) ;
26+
percentiles:units = "1" ;
27+
percentiles:long_name = "percentile" ;
28+
float deptht(deptht) ;
29+
deptht:bounds = "deptht_bnds" ;
30+
deptht:units = "m" ;
31+
deptht:standard_name = "depth" ;
32+
deptht:long_name = "Vertical T levels" ;
33+
deptht:positive = "down" ;
34+
deptht:title = "deptht" ;
35+
double deptht_bnds(deptht, bnds) ;
36+
float nav_lat(dim0, dim1) ;
37+
nav_lat:bounds = "nav_lat_bnds" ;
38+
nav_lat:units = "degrees" ;
39+
nav_lat:standard_name = "latitude" ;
40+
nav_lat:long_name = "Latitude" ;
41+
nav_lat:nav_model = "Default grid" ;
42+
double nav_lat_bnds(dim0, dim1, bnds_4) ;
43+
float nav_lon(dim0, dim1) ;
44+
nav_lon:bounds = "nav_lon_bnds" ;
45+
nav_lon:units = "degrees" ;
46+
nav_lon:standard_name = "longitude" ;
47+
nav_lon:long_name = "Longitude" ;
48+
nav_lon:nav_model = "Default grid" ;
49+
double nav_lon_bnds(dim0, dim1, bnds_4) ;
50+
int time(time) ;
51+
time:units = "hours since 2001-01-01 00:00:00" ;
52+
time:standard_name = "time" ;
53+
time:long_name = "Time axis" ;
54+
time:calendar = "360_day" ;
55+
time:time_origin = "2001-JAN-01 00:00:00" ;
56+
time:title = "Time" ;
57+
58+
// global attributes:
59+
:Conventions = "CF-1.5" ;
60+
61+
data:
62+
time = 0, 24, 48 ;
63+
deptht = 0., 10., 100., 1000. ;
64+
percentiles = 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95 ;
65+
66+
}

0 commit comments

Comments
 (0)