1
+ from collections import OrderedDict
1
2
import contextlib
2
3
import copy
4
+ from difflib import SequenceMatcher
5
+ import operator
3
6
import os
4
7
import re
5
8
import time
@@ -667,6 +670,78 @@ def graph_elems(self):
667
670
return instances , links
668
671
669
672
673
+ def _merge_sequences (seq1 ,seq2 ):
674
+ sm = SequenceMatcher (a = seq1 ,b = seq2 )
675
+ res = []
676
+ for (op , start1 , end1 , start2 , end2 ) in sm .get_opcodes ():
677
+ if op == 'equal' or op == 'delete' :
678
+ #This range appears in both sequences, or only in the first one.
679
+ res += seq1 [start1 :end1 ]
680
+ elif op == 'insert' :
681
+ #This range appears in only the second sequence.
682
+ res += seq2 [start2 :end2 ]
683
+ elif op == 'replace' :
684
+ #There are different ranges in each sequence - add both.
685
+ res += seq1 [start1 :end1 ]
686
+ res += seq2 [start2 :end2 ]
687
+ return res
688
+
689
+ def netcdf_shared_dimensions (source_var , target_var ):
690
+ result = OrderedDict ((('sourceReshape' , OrderedDict ()),
691
+ ('targetReshape' , OrderedDict ())))
692
+ source_dims = OrderedDict (zip (source_var .dimensions , source_var .shape ))
693
+ target_dims = OrderedDict (zip (target_var .dimensions , target_var .shape ))
694
+ initial = OrderedDict ((('sourceReshape' , source_dims ),
695
+ ('targetReshape' , target_dims )))
696
+ combined_dims_unordered = OrderedDict (source_dims .items () | target_dims .items ())
697
+ myorder = _merge_sequences (source_var .dimensions , target_var .dimensions )
698
+ ordered_dims = OrderedDict ((k , combined_dims_unordered [k ]) for k in myorder )
699
+ result = OrderedDict ((('sourceReshape' , OrderedDict ((k , combined_dims_unordered [k ]) for k in myorder )),
700
+ ('targetReshape' , OrderedDict ((k , combined_dims_unordered [k ]) for k in myorder ))))
701
+ for k in result :
702
+ for rk in result [k ]:
703
+ if rk not in initial [k ]:
704
+ result [k ][rk ] = 1
705
+ # check overall nValues is consistent
706
+ # is this validation?
707
+ # or, can this only be a code bug, given nc dims???
708
+ for k in result :
709
+ if six .moves .reduce (operator .mul , [i [1 ] for i in result [k ].items ()], 1 ) != six .moves .reduce (operator .mul , [i [1 ] for i in initial [k ].items ()], 1 ):
710
+ raise ValueError ('Reshape lists must have the same count for the multiplication of elements' )
711
+ return result
712
+
713
+
714
+
715
+ # def netcdf_shared_dimensions(source_var, target_var):
716
+ # result = OrderedDict((('sourceReshape', OrderedDict()),
717
+ # ('targetReshape', OrderedDict())))
718
+ # source_dims = OrderedDict(zip(source_var.dimensions, source_var.shape))
719
+ # target_dims = OrderedDict(zip(target_var.dimensions, target_var.shape))
720
+ # initial = OrderedDict((('sourceReshape', source_dims),
721
+ # ('targetReshape', target_dims)))
722
+
723
+ # if list(target_dims.keys())[0] not in source_dims.keys():
724
+ # result['sourceReshape'] = OrderedDict(list(target_dims.items()) +
725
+ # list(source_dims.items()))
726
+ # result['targetReshape'] = OrderedDict(list(target_dims.items()) +
727
+ # list(source_dims.items()))
728
+ # else:
729
+ # result['sourceReshape'] = OrderedDict(list(source_dims.items()) +
730
+ # list(target_dims.items()))
731
+ # result['targetReshape'] = OrderedDict(list(source_dims.items()) +
732
+ # list(target_dims.items()))
733
+ # for k in result:
734
+ # for rk in result[k]:
735
+ # if rk not in initial[k]:
736
+ # result[k][rk] = 1
737
+ # # check overall nValues is consistent
738
+ # # is this validation?
739
+ # # or, can this only be a code bug, given nc dims???
740
+ # for k in result:
741
+ # if six.moves.reduce(operator.mul, [i[1] for i in result[k].items()], 1) != six.moves.reduce(operator.mul, [i[1] for i in initial[k].items()], 1):
742
+ # raise ValueError('Reshape lists must have the same count for the multiplication of elements')
743
+ # return result
744
+
670
745
@contextlib .contextmanager
671
746
def load (afilepath ):
672
747
if afilepath .endswith ('.hdf' ):
@@ -831,12 +906,13 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
831
906
dtype = '{}{}' .format (fhandle .variables [name ].dtype .kind ,
832
907
fhandle .variables [name ].dtype .itemsize )
833
908
fv = netCDF4 .default_fillvals .get (dtype )
909
+ first = None
834
910
if fhandle .variables [name ][0 ] == fv :
835
911
first = np .ma .MaskedArray (fhandle .variables [name ][0 ],
836
912
mask = True )
837
913
else :
838
914
first = fhandle .variables [name ][0 ]
839
- if first :
915
+ if first is not None :
840
916
try :
841
917
first = int (first )
842
918
except Exception :
@@ -1002,19 +1078,40 @@ def _make_ref_entities(var, fhandle, pref, name, baseuri,
1002
1078
fhandle .variables [pref ].shape ):
1003
1079
try :
1004
1080
refset = var .attrs .get ('bald__references' , set ())
1005
- cv_shape = fhandle .variables [pref ].shape
1006
- var_shape = fhandle .variables [name ].shape
1081
+ # cv_shape = fhandle.variables[pref].shape
1082
+ # var_shape = fhandle.variables[name].shape
1007
1083
identity = '{}_{}_ref' .format (name , pref )
1008
1084
rattrs = {}
1009
1085
rattrs ['rdf__type' ] = 'bald__Reference'
1010
- reshape = [1 for adim in var_shape ]
1011
-
1012
- dims = fhandle .variables [pref ].dimensions
1013
- for dim in dims :
1014
- cvi = fhandle .variables [name ].dimensions .index (dim )
1015
- reshape [cvi ] = int (fhandle .dimensions [dim ].size )
1016
- rattrs ['bald__childBroadcast' ] = reshape
1017
- rattrs ['bald__array' ] = set ((file_variables .get (pref ),))
1086
+ # reshape = [1 for adim in var_shape]
1087
+ # reshape_name = [1 for adim in set(fhandle.variables[name].dimensions).union(set(fhandle.variables[pref].dimensions))]
1088
+ # reshape_pref = [1 for adim in set(fhandle.variables[name].dimensions).union(set(fhandle.variables[pref].dimensions))]
1089
+ # name_dims = OrderedDict(zip(fhandle.variables[name].dimensions, var_shape))
1090
+ # pref_dims = OrderedDict(zip(fhandle.variables[pref].dimensions, cv_shape))
1091
+ # combined_dims = OrderedDict(list(pref_dims.items())+list(name_dims.items()))
1092
+ # reverse_combined_dims = OrderedDict(list(name_dims.items())+list(pref_dims.items()))
1093
+ # # also
1094
+ # # name_dims.update(pref_dims)
1095
+ # # pref_dims.update(name_dims)
1096
+
1097
+ # dims = fhandle.variables[pref].dimensions
1098
+ # for dim in dims:
1099
+ # cvi = fhandle.variables[name].dimensions.index(dim)
1100
+ # reshape[cvi] = int(fhandle.dimensions[dim].size)
1101
+ # rattrs['bald__childBroadcast'] = reshape
1102
+ # rattrs['bald__array'] = set((file_variables.get(pref),))
1103
+
1104
+ reshapes = netcdf_shared_dimensions (fhandle .variables [name ],
1105
+ fhandle .variables [pref ])
1106
+ rattrs ['bald__sourceShape' ] = list (fhandle .variables [name ].shape )
1107
+ rattrs ['bald__targetShape' ] = list (fhandle .variables [pref ].shape )
1108
+ sourceReshape = [i [1 ] for i in reshapes ['sourceReshape' ].items ()]
1109
+ if sourceReshape != list (fhandle .variables [name ].shape ):
1110
+ rattrs ['bald__sourceReshape' ] = sourceReshape
1111
+ targetReshape = [i [1 ] for i in reshapes ['targetReshape' ].items ()]
1112
+ if targetReshape != list (fhandle .variables [pref ].shape ):
1113
+ rattrs ['bald__targetReshape' ] = targetReshape
1114
+ rattrs ['bald__target' ] = set ((file_variables .get (pref ),))
1018
1115
ref_node = Subject (baseuri , identity , rattrs ,
1019
1116
prefixes = prefixes ,
1020
1117
aliases = aliases ,
@@ -1024,6 +1121,7 @@ def _make_ref_entities(var, fhandle, pref, name, baseuri,
1024
1121
refset .add (ref_node )
1025
1122
var .attrs ['bald__references' ] = refset
1026
1123
except ValueError :
1124
+ import pdb ; pdb .set_trace ()
1027
1125
# Indexing and dimension identification can fail, especially
1028
1126
# with unexpectedy formated files. Fail silently on load, to
1029
1127
# that a partial graph may be returned. Issues like this are
0 commit comments