diff --git a/larray/core/axis.py b/larray/core/axis.py index 2d8f81baa..5ae80ab03 100644 --- a/larray/core/axis.py +++ b/larray/core/axis.py @@ -1349,10 +1349,13 @@ def to_hdf(self, filepath, key=None): raise ValueError("Argument key must be provided explicitly in case of anonymous axis") key = self.name key = _translate_group_key_hdf(key) - s = pd.Series(data=self.labels, name=self.name) + dtype_kind = self.labels.dtype.kind + data = np.char.encode(self.labels, 'utf-8') if dtype_kind == 'U' else self.labels + s = pd.Series(data=data, name=self.name) with LHDFStore(filepath) as store: store.put(key, s) store.get_storer(key).attrs.type = 'Axis' + store.get_storer(key).attrs.dtype_kind = dtype_kind store.get_storer(key).attrs.wildcard = self.iswildcard @property diff --git a/larray/core/group.py b/larray/core/group.py index 52030d474..6d93c9e9f 100644 --- a/larray/core/group.py +++ b/larray/core/group.py @@ -1462,10 +1462,15 @@ def to_hdf(self, filepath, key=None, axis_key=None): if self.axis.name is None: raise ValueError("Argument axis_key must be provided explicitly if the associated axis is anonymous") axis_key = self.axis.name - s = pd.Series(data=self.eval(), name=self.name) + data = self.eval() + dtype_kind = data.dtype.kind if isinstance(data, np.ndarray) else '' + if dtype_kind == 'U': + data = np.char.encode(data, 'utf-8') + s = pd.Series(data=data, name=self.name) with LHDFStore(filepath) as store: store.put(key, s) store.get_storer(key).attrs.type = 'Group' + store.get_storer(key).attrs.dtype_kind = dtype_kind if axis_key not in store: self.axis.to_hdf(store, key=axis_key) store.get_storer(key).attrs.axis_key = axis_key diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py index 025498add..09ea6d0c7 100644 --- a/larray/inout/hdf.py +++ b/larray/inout/hdf.py @@ -88,15 +88,21 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s name = str(pd_obj.name) if name == 'None': name = None - res = Axis(labels=pd_obj.values, name=name) + labels = pd_obj.values + if 'dtype_kind' in attrs and attrs['dtype_kind'] == 'U': + labels = np.char.decode(labels, 'utf-8') + res = Axis(labels=labels, name=name) res._iswildcard = attrs['wildcard'] elif _type == 'Group': if name is None: name = str(pd_obj.name) if name == 'None': name = None + key = pd_obj.values + if 'dtype_kind' in attrs and attrs['dtype_kind'] == 'U': + key = np.char.decode(key, 'utf-8') axis = read_hdf(filepath_or_buffer, attrs['axis_key']) - res = LGroup(key=pd_obj.values, name=name, axis=axis) + res = LGroup(key=key, name=name, axis=axis) return res diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 4d669d2ca..268e73f72 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -1,3 +1,4 @@ +# -*- coding: utf8 -*- from __future__ import absolute_import, division, print_function import os diff --git a/larray/tests/test_axis.py b/larray/tests/test_axis.py index 87ed1de75..1410e9e26 100644 --- a/larray/tests/test_axis.py +++ b/larray/tests/test_axis.py @@ -1,4 +1,6 @@ +# -*- coding: utf8 -*- from __future__ import absolute_import, division, print_function + import pytest import os.path import numpy as np @@ -391,6 +393,7 @@ def test_h5_io(tmpdir): lipro = Axis('lipro=P01..P05') anonymous = Axis(range(3)) wildcard = Axis(3, 'wildcard') + string_axis = Axis(['@!àéè&%µ$~', '/*-+_§()><', 'another label'], 'string_axis') fpath = os.path.join(str(tmpdir), 'axes.h5') # ---- default behavior ---- @@ -410,6 +413,10 @@ def test_h5_io(tmpdir): wildcard2 = read_hdf(fpath, key=wildcard.name) assert wildcard2.iswildcard assert wildcard.equals(wildcard2) + # string axis + string_axis.to_hdf(fpath) + string_axis2 = read_hdf(fpath, string_axis.name) + assert string_axis.equals(string_axis2) # ---- specific key ---- # int axis diff --git a/larray/tests/test_group.py b/larray/tests/test_group.py index fba0c5ad6..9fd582bab 100644 --- a/larray/tests/test_group.py +++ b/larray/tests/test_group.py @@ -1,4 +1,6 @@ +# -*- coding: utf8 -*- from __future__ import absolute_import, division, print_function + import pytest import os.path import numpy as np @@ -192,6 +194,7 @@ def test_h5_io_lgroup(tmpdir): named_axis_not_in_file = lipro['P01,P03,P05'] >> 'P_odd' anonymous = age[':5'] wildcard = age_wildcard[':5'] >> 'age_w_05' + string_group = Axis(['@!àéè&%µ$~', '/*-+_§()><', 'another label'], 'string_axis')[:] >> 'string_group' # ---- default behavior ---- # named group @@ -209,6 +212,10 @@ def test_h5_io_lgroup(tmpdir): named_axis_not_in_file.to_hdf(fpath) named2 = read_hdf(fpath, key=named_axis_not_in_file.name) assert all(named_axis_not_in_file == named2) + # string group + string_group.to_hdf(fpath) + string_group2 = read_hdf(fpath, key=string_group.name) + assert all(string_group == string_group2) # ---- specific hdf group + key ---- hdf_group = 'my_groups'