13
13
from larray .core .metadata import Metadata
14
14
from larray .util .misc import LHDFStore
15
15
from larray .inout .session import register_file_handler
16
- from larray .inout .common import FileHandler , _supported_scalar_types
16
+ from larray .inout .common import FileHandler , _supported_larray_types , _supported_scalars_types
17
17
from larray .inout .pandas import df_asarray
18
18
from larray .example import get_example_filepath
19
19
20
20
21
+ _hdf_supported_types = _supported_larray_types + _supported_scalars_types
22
+
23
+
24
+ class ScalarHDF (object ):
25
+ def __init__ (self , value ):
26
+ _type = type (value ).__name__
27
+ if not isinstance (value , _supported_scalars_types ):
28
+ raise TypeError ("Type {} which is not currently supported by the HDF5 format" .format (_type ))
29
+ self .value = value
30
+ self ._type = _type
31
+
32
+ def to_hdf (self , filepath , key ):
33
+ key = _translate_group_key_hdf (key )
34
+ s = pd .Series (data = self .value )
35
+ with LHDFStore (filepath ) as store :
36
+ store .put (key , s )
37
+ store .get_storer (key ).attrs .type = self ._type
38
+
39
+
40
+ # for backward compatibility (larray < 0.29) but any object read from an hdf file should have
41
+ # an attribute 'type'
42
+ def _get_type_from_attrs (attrs ):
43
+ return attrs .type if 'type' in attrs else 'Array'
44
+
45
+
21
46
def read_hdf (filepath_or_buffer , key , fill_value = nan , na = nan , sort_rows = False , sort_columns = False ,
22
47
name = None , ** kwargs ):
23
48
r"""Reads a scalar or an axis or group or array named key from a HDF5 file in filepath (path+name)
@@ -73,53 +98,51 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
73
98
with LHDFStore (filepath_or_buffer ) as store :
74
99
try :
75
100
pd_obj = store .get (key )
76
- attrs = store .get_storer (key ).attrs
77
- writer = attrs .writer if 'writer' in attrs else None
78
- # for backward compatibility but any object read from an hdf file should have an attribute 'type'
79
- _type = attrs .type if 'type' in attrs else 'Array'
80
- _meta = attrs .metadata if 'metadata' in attrs else None
81
- if _type == 'Array' :
82
- # cartesian product is not necessary if the array was written by LArray
83
- cartesian_prod = writer != 'LArray'
84
- res = df_asarray (pd_obj , sort_rows = sort_rows , sort_columns = sort_columns , fill_value = fill_value ,
85
- parse_header = False , cartesian_prod = cartesian_prod )
86
- if _meta is not None :
87
- res .meta = _meta
88
- elif _type == 'Axis' :
89
- if name is None :
90
- name = str (pd_obj .name )
91
- if name == 'None' :
92
- name = None
93
- labels = pd_obj .values
94
- if 'dtype_kind' in attrs and attrs ['dtype_kind' ] == 'U' :
95
- # this check is there because there are cases where dtype_kind is 'U' but pandas returns
96
- # an array with object dtype containing bytes instead of a string array, and in that case
97
- # np.char.decode does not work
98
- # this is at least the case for Python2 + Pandas 0.24.2 combination
99
- if labels .dtype .kind == 'O' :
100
- labels = np .array ([l .decode ('utf-8' ) for l in labels ], dtype = 'U' )
101
- else :
102
- labels = np .char .decode (labels , 'utf-8' )
103
- res = Axis (labels = labels , name = name )
104
- res ._iswildcard = attrs ['wildcard' ]
105
- elif _type == 'Group' :
106
- if name is None :
107
- name = str (pd_obj .name )
108
- if name == 'None' :
109
- name = None
110
- key = pd_obj .values
111
- if 'dtype_kind' in attrs and attrs ['dtype_kind' ] == 'U' :
112
- key = np .char .decode (key , 'utf-8' )
113
- axis = read_hdf (filepath_or_buffer , attrs ['axis_key' ])
114
- res = LGroup (key = key , name = name , axis = axis )
115
- elif _type == 'scalar' :
116
- res = pd_obj .values
117
- # XXX : assert len(res) == 1 ?
118
- if len (res ) == 1 :
119
- res = res [0 ]
120
101
except KeyError :
121
102
filepath = filepath_or_buffer if isinstance (filepath_or_buffer , HDFStore ) else store .filename
122
103
raise KeyError ('No item with name {} has been found in file {}' .format (key , filepath ))
104
+ attrs = store .get_storer (key ).attrs
105
+ writer = attrs .writer if 'writer' in attrs else None
106
+ _type = _get_type_from_attrs (attrs )
107
+ _meta = attrs .metadata if 'metadata' in attrs else None
108
+ if _type == 'Array' :
109
+ # cartesian product is not necessary if the array was written by LArray
110
+ cartesian_prod = writer != 'LArray'
111
+ res = df_asarray (pd_obj , sort_rows = sort_rows , sort_columns = sort_columns , fill_value = fill_value ,
112
+ parse_header = False , cartesian_prod = cartesian_prod )
113
+ if _meta is not None :
114
+ res .meta = _meta
115
+ elif _type == 'Axis' :
116
+ if name is None :
117
+ name = str (pd_obj .name )
118
+ if name == 'None' :
119
+ name = None
120
+ labels = pd_obj .values
121
+ if 'dtype_kind' in attrs and attrs ['dtype_kind' ] == 'U' :
122
+ # this check is there because there are cases where dtype_kind is 'U' but pandas returns
123
+ # an array with object dtype containing bytes instead of a string array, and in that case
124
+ # np.char.decode does not work
125
+ # this is at least the case for Python2 + Pandas 0.24.2 combination
126
+ if labels .dtype .kind == 'O' :
127
+ labels = np .array ([l .decode ('utf-8' ) for l in labels ], dtype = 'U' )
128
+ else :
129
+ labels = np .char .decode (labels , 'utf-8' )
130
+ res = Axis (labels = labels , name = name )
131
+ res ._iswildcard = attrs ['wildcard' ]
132
+ elif _type == 'Group' :
133
+ if name is None :
134
+ name = str (pd_obj .name )
135
+ if name == 'None' :
136
+ name = None
137
+ key = pd_obj .values
138
+ if 'dtype_kind' in attrs and attrs ['dtype_kind' ] == 'U' :
139
+ key = np .char .decode (key , 'utf-8' )
140
+ axis = read_hdf (filepath_or_buffer , attrs ['axis_key' ])
141
+ res = LGroup (key = key , name = name , axis = axis )
142
+ elif _type in {cls .__name__ for cls in _supported_scalars_types }:
143
+ res = pd_obj .values
144
+ assert len (res ) == 1
145
+ res = res [0 ]
123
146
return res
124
147
125
148
@@ -136,46 +159,34 @@ def _open_for_write(self):
136
159
137
160
def list_items (self ):
138
161
keys = [key .strip ('/' ) for key in self .handle .keys ()]
139
- items = []
140
- # scalars
141
- items += [(key .split ('/' )[- 1 ], 'scalar' ) for key in keys if '__scalars__' in key ]
162
+ items = [(key , _get_type_from_attrs (self .handle .get_storer (key ).attrs )) for key in keys if '/' not in key ]
163
+ # ---- for backward compatibility (LArray < 0.33) ----
142
164
# axes
143
- items += [(key .split ('/' )[- 1 ], 'Axis ' ) for key in keys if '__axes__' in key ]
165
+ items += [(key .split ('/' )[- 1 ], 'Axis_Backward_Comp ' ) for key in keys if '__axes__' in key ]
144
166
# groups
145
- items += [(key .split ('/' )[- 1 ], 'Group' ) for key in keys if '__groups__' in key ]
146
- # arrays
147
- items += [(key , 'Array' ) for key in keys if '/' not in key ]
167
+ items += [(key .split ('/' )[- 1 ], 'Group_Backward_Comp' ) for key in keys if '__groups__' in key ]
148
168
return items
149
169
150
170
def _read_item (self , key , type , * args , ** kwargs ):
151
- if type == 'Array' :
171
+ if type in { cls . __name__ for cls in _hdf_supported_types } :
152
172
hdf_key = '/' + key
153
- elif type == 'Axis' :
173
+ # ---- for backward compatibility (LArray < 0.33) ----
174
+ elif type == 'Axis_Backward_Comp' :
154
175
hdf_key = '__axes__/' + key
155
- elif type == 'Group ' :
176
+ elif type == 'Group_Backward_Comp ' :
156
177
hdf_key = '__groups__/' + key
157
- elif type == 'scalar' :
158
- hdf_key = '__scalars__/' + key
159
178
else :
160
179
raise TypeError ()
161
180
return read_hdf (self .handle , hdf_key , * args , ** kwargs )
162
181
163
182
def _dump_item (self , key , value , * args , ** kwargs ):
164
- if isinstance (value , Array ):
183
+ if isinstance (value , _supported_scalars_types ):
184
+ value = ScalarHDF (value )
185
+ elif isinstance (value , Group ):
186
+ kwargs ['axis_key' ] = '/' + value .axis .name
187
+ if hasattr (value , 'to_hdf' ):
165
188
hdf_key = '/' + key
166
189
value .to_hdf (self .handle , hdf_key , * args , ** kwargs )
167
- elif isinstance (value , Axis ):
168
- hdf_key = '__axes__/' + key
169
- value .to_hdf (self .handle , hdf_key , * args , ** kwargs )
170
- elif isinstance (value , Group ):
171
- hdf_key = '__groups__/' + key
172
- hdf_axis_key = '__axes__/' + value .axis .name
173
- value .to_hdf (self .handle , hdf_key , hdf_axis_key , * args , ** kwargs )
174
- elif isinstance (value , _supported_scalar_types ):
175
- hdf_key = '__scalars__/' + key
176
- s = pd .Series (value )
177
- self .handle .put (hdf_key , s )
178
- self .handle .get_storer (hdf_key ).attrs .type = 'scalar'
179
190
else :
180
191
raise TypeError ()
181
192
0 commit comments