From 26ab2bae9bdfa6d32d44a5e5a78cc69c234b5fd2 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Wed, 28 Aug 2019 15:45:11 +0200 Subject: [PATCH 1/8] fix #803 : - included key associated with Axis and Group objects when exporting Session objects to CSV or Excel files - removed useless kwargs['name'] = key in PandasHDFHandler._read_item() --- doc/source/changes/version_0_32.rst.inc | 3 +- larray/inout/csv.py | 10 +++--- larray/inout/excel.py | 20 ++++++------ larray/inout/hdf.py | 2 -- larray/inout/pandas.py | 41 ++++++++++++++----------- 5 files changed, 40 insertions(+), 36 deletions(-) diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc index bbb2f6750..b2e125b7e 100644 --- a/doc/source/changes/version_0_32.rst.inc +++ b/doc/source/changes/version_0_32.rst.inc @@ -55,4 +55,5 @@ Miscellaneous improvements Fixes ^^^^^ -* fixed something (closes :issue:`1`). +* fixed reading/exporting sessions containing two or more axes/groups + with the same name (or anonymous) from/to CSV, Excel and HDF files (closes :issue:`803`). diff --git a/larray/inout/csv.py b/larray/inout/csv.py index 440771afe..621f44075 100644 --- a/larray/inout/csv.py +++ b/larray/inout/csv.py @@ -329,15 +329,15 @@ def list_items(self): pass try: fnames.remove('__axes__') - items = [(name, 'Axis') for name in sorted(self.axes.keys())] + items = [(key, 'Axis') for key in sorted(self.axes.keys())] except: pass try: fnames.remove('__groups__') - items += [(name, 'Group') for name in sorted(self.groups.keys())] + items += [(key, 'Group') for key in sorted(self.groups.keys())] except: pass - items += [(name, 'Array') for name in fnames] + items += [(key, 'Array') for key in fnames] return items def _read_item(self, key, type, *args, **kwargs): @@ -375,10 +375,10 @@ def _dump_metadata(self, metadata): def save(self): if len(self.axes) > 0: - df = _axes_to_df(self.axes.values()) + df = _axes_to_df(self.axes) df.to_csv(self._to_filepath('__axes__'), sep=self.sep, index=False) if len(self.groups) > 0: - df = _groups_to_df(self.groups.values()) + df = _groups_to_df(self.groups) df.to_csv(self._to_filepath('__groups__'), sep=self.sep, index=False) def close(self): diff --git a/larray/inout/excel.py b/larray/inout/excel.py index d7d7cfff8..c9df21b3b 100644 --- a/larray/inout/excel.py +++ b/larray/inout/excel.py @@ -268,15 +268,15 @@ def list_items(self): pass try: sheet_names.remove('__axes__') - items = [(name, 'Axis') for name in sorted(self.axes.keys())] + items = [(key, 'Axis') for key in sorted(self.axes.keys())] except: pass try: sheet_names.remove('__groups__') - items += [(name, 'Group') for name in sorted(self.groups.keys())] + items += [(key, 'Group') for key in sorted(self.groups.keys())] except: pass - items += [(name, 'Array') for name in sheet_names] + items += [(key, 'Array') for key in sheet_names] return items def _read_item(self, key, type, *args, **kwargs): @@ -316,10 +316,10 @@ def _dump_metadata(self, metadata): def save(self): if len(self.axes) > 0: - df = _axes_to_df(self.axes.values()) + df = _axes_to_df(self.axes) df.to_excel(self.handle, '__axes__', index=False, engine='xlsxwriter') if len(self.groups) > 0: - df = _groups_to_df(self.groups.values()) + df = _groups_to_df(self.groups) df.to_excel(self.handle, '__groups__', index=False, engine='xlsxwriter') def close(self): @@ -373,15 +373,15 @@ def list_items(self): pass try: sheet_names.remove('__axes__') - items = [(name, 'Axis') for name in sorted(self.axes.keys())] + items = [(key, 'Axis') for key in sorted(self.axes.keys())] except: pass try: sheet_names.remove('__groups__') - items += [(name, 'Group') for name in sorted(self.groups.keys())] + items += [(key, 'Group') for key in sorted(self.groups.keys())] except: pass - items += [(name, 'Array') for name in sheet_names] + items += [(key, 'Array') for key in sheet_names] return items def _read_item(self, key, type, *args, **kwargs): @@ -419,11 +419,11 @@ def _dump_metadata(self, metadata): def save(self): if len(self.axes) > 0: - df = _axes_to_df(self.axes.values()) + df = _axes_to_df(self.axes) self.handle['__axes__'] = '' self.handle['__axes__'][:].options(pd.DataFrame, index=False).value = df if len(self.groups) > 0: - df = _groups_to_df(self.groups.values()) + df = _groups_to_df(self.groups) self.handle['__groups__'] = '' self.handle['__groups__'][:].options(pd.DataFrame, index=False).value = df self.handle.save() diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py index 92bbc7516..71c39d5e9 100644 --- a/larray/inout/hdf.py +++ b/larray/inout/hdf.py @@ -132,10 +132,8 @@ def _read_item(self, key, type, *args, **kwargs): hdf_key = '/' + key elif type == 'Axis': hdf_key = '__axes__/' + key - kwargs['name'] = key elif type == 'Group': hdf_key = '__groups__/' + key - kwargs['name'] = key else: raise TypeError() return read_hdf(self.handle, hdf_key, *args, **kwargs) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index a9732f4c0..57093f888 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -338,25 +338,35 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header # SERIES <--> AXIS, GROUP, META # # #################################### # -def _axis_to_series(axis, dtype=None): - return pd.Series(data=axis.labels, name=str(axis), dtype=dtype) +def _axis_to_series(key, axis, dtype=None): + name = '{}:{}'.format(key, axis.name) + return pd.Series(data=axis.labels, name=name, dtype=dtype) def _series_to_axis(series): - return Axis(labels=series.values, name=series.name) + name = str(series.name) + if ':' in name: + key, axis_name = name.split(':') + else: + # for backward compatibility + key = axis_name = name + return key, Axis(labels=series.values, name=axis_name) -def _group_to_series(group, dtype=None): - name = group.name if group.name is not None else '{?}' +def _group_to_series(key, group, dtype=None): if group.axis.name is None: raise ValueError("Cannot save a group with an anonymous associated axis") - name += '@{}'.format(group.axis.name) + name = '{}:{}@{}'.format(key, group.name, group.axis.name) return pd.Series(data=group.eval(), name=name, dtype=dtype) -def _series_to_group(series, axis): - name = series.name.split('@')[0] - return LGroup(key=series.values, name=name, axis=axis) +def _series_to_group(series, axes): + key, name = str(series.name).split(':') + group_name, axis_name = name.split('@') + if group_name == 'None': + group_name = None + axis = axes[axis_name] + return key, LGroup(key=series.values, name=group_name, axis=axis) # ######################################## # @@ -364,25 +374,20 @@ def _series_to_group(series, axis): # ######################################## # def _df_to_axes(df): - return OrderedDict([(col_name, _series_to_axis(df[col_name])) for col_name in df.columns.values]) + return OrderedDict([_series_to_axis(df[col_name]) for col_name in df.columns.values]) def _axes_to_df(axes): # set dtype to np.object otherwise pd.concat below may convert an int row/column as float # if trailing NaN need to be added - return pd.concat([_axis_to_series(axis, dtype=np.object) for axis in axes], axis=1) + return pd.concat([_axis_to_series(key, axis, dtype=np.object) for key, axis in axes.items()], axis=1) def _df_to_groups(df, axes): - groups = OrderedDict() - for name, values in df.iteritems(): - group_name, axis_name = name.split('@') - axis = axes[axis_name] - groups[group_name] = _series_to_group(values, axis) - return groups + return OrderedDict([_series_to_group(df[col_name], axes) for col_name in df.columns.values]) def _groups_to_df(groups): # set dtype to np.object otherwise pd.concat below may convert an int row/column as float # if trailing NaN need to be added - return pd.concat([_group_to_series(group, dtype=np.object) for group in groups], axis=1) + return pd.concat([_group_to_series(key, group, dtype=np.object) for key, group in groups.items()], axis=1) From f9ce292007cd468a32cd8aa0dc7b2ab849972c47 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Thu, 29 Aug 2019 08:22:35 +0200 Subject: [PATCH 2/8] fix #804 : removed trailing NaNs and None values when extracting Axis and Group objects from __axes__ and __groups__ special sheets/csv files --- doc/source/changes/version_0_32.rst.inc | 3 +++ larray/inout/pandas.py | 24 ++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc index b2e125b7e..6982b703e 100644 --- a/doc/source/changes/version_0_32.rst.inc +++ b/doc/source/changes/version_0_32.rst.inc @@ -57,3 +57,6 @@ Fixes * fixed reading/exporting sessions containing two or more axes/groups with the same name (or anonymous) from/to CSV, Excel and HDF files (closes :issue:`803`). + +* fixed NaNs and None labels appearing in axes and groups when reading/exporting sessions + from/to CSV and Excel files (closes :issue:`804`). diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index 57093f888..94552a7ce 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -338,6 +338,24 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header # SERIES <--> AXIS, GROUP, META # # #################################### # +def _extract_labels_from_series(series): + # remove trailing NaN or None values + # (multiple Axis or Group objects of different lengths + # are stored in the same DataFrame leading to trailing + # NaNs or None values when split into series) + series = series.loc[:series.last_valid_index()] + + labels = np.asarray(series.values) + # integer labels of axes or groups may have been converted to float values + # because of trailing NaNs + if labels.dtype.kind == 'f' and all([label.is_integer() for label in labels]): + labels = labels.astype(int) + # if dtype is still object, we assume values are strings + if labels.dtype.kind == 'O': + labels = labels.astype(str) + return labels + + def _axis_to_series(key, axis, dtype=None): name = '{}:{}'.format(key, axis.name) return pd.Series(data=axis.labels, name=name, dtype=dtype) @@ -345,12 +363,13 @@ def _axis_to_series(key, axis, dtype=None): def _series_to_axis(series): name = str(series.name) + labels = _extract_labels_from_series(series) if ':' in name: key, axis_name = name.split(':') else: # for backward compatibility key = axis_name = name - return key, Axis(labels=series.values, name=axis_name) + return key, Axis(labels=labels, name=axis_name) def _group_to_series(key, group, dtype=None): @@ -366,7 +385,8 @@ def _series_to_group(series, axes): if group_name == 'None': group_name = None axis = axes[axis_name] - return key, LGroup(key=series.values, name=group_name, axis=axis) + group_key = _extract_labels_from_series(series) + return key, LGroup(key=group_key, name=group_name, axis=axis) # ######################################## # From 8cf1fc9933d164cd98cdba090717eaa787dd8105 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Fri, 30 Aug 2019 15:57:51 +0200 Subject: [PATCH 3/8] (issue 805): anonymous and/or wildcard axes are handled correctly when using Session.save() and Session.load() --- larray/inout/pandas.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index 94552a7ce..88afa37d2 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, print_function +import re from itertools import product from collections import OrderedDict @@ -140,6 +141,9 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs): return LArray(s.values, Axis(s.index.values, name), meta=meta) +_anonymous_axis_pattern = re.compile(r'\{(\d+|\??)\}\*?') + + def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfold_last_axis_name=False, fill_value=nan, meta=None, cartesian_prod=True, **kwargs): r""" @@ -357,8 +361,9 @@ def _extract_labels_from_series(series): def _axis_to_series(key, axis, dtype=None): - name = '{}:{}'.format(key, axis.name) - return pd.Series(data=axis.labels, name=name, dtype=dtype) + name = '{}:{}'.format(key, str(axis)) + labels = len(axis) if axis.iswildcard else axis.labels + return pd.Series(data=labels, name=name, dtype=dtype) def _series_to_axis(series): @@ -366,6 +371,10 @@ def _series_to_axis(series): labels = _extract_labels_from_series(series) if ':' in name: key, axis_name = name.split(':') + if axis_name[-1] == '*': + labels = labels[0] + if _anonymous_axis_pattern.match(axis_name): + axis_name = None else: # for backward compatibility key = axis_name = name From 234825749f4eb7101669de00c673f8757f924af2 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Fri, 30 Aug 2019 11:28:23 +0200 Subject: [PATCH 4/8] (issue 805) updated LArray.to_frame(): call AxisCollection.display_names property at the beginning to to make LArray.to_frame() consistent with LArray.dump() (and then to make it possible to handle anonymous and/or wildcard axes when dealing with CSV and Excel formats) --- larray/core/array.py | 15 +++++++-------- larray/tests/test_array.py | 11 +++++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/larray/core/array.py b/larray/core/array.py index 17612fa91..a16ee22ab 100644 --- a/larray/core/array.py +++ b/larray/core/array.py @@ -1185,22 +1185,21 @@ def to_frame(self, fold_last_axis_name=False, dropna=None): b1 6 7 """ columns = pd.Index(self.axes[-1].labels) + axes_names = self.axes.display_names[:] if not fold_last_axis_name: - columns.name = self.axes[-1].name + columns.name = axes_names[-1] if self.ndim > 1: - axes_names = self.axes.names[:-1] + _axes_names = axes_names[:-1] if fold_last_axis_name: - tmp = axes_names[-1] if axes_names[-1] is not None else '' - if self.axes[-1].name: - axes_names[-1] = "{}\\{}".format(tmp, self.axes[-1].name) + _axes_names[-1] = "{}\\{}".format(_axes_names[-1], axes_names[-1]) if self.ndim == 2: - index = pd.Index(data=self.axes[0].labels, name=axes_names[0]) + index = pd.Index(data=self.axes[0].labels, name=_axes_names[0]) else: - index = pd.MultiIndex.from_product(self.axes.labels[:-1], names=axes_names) + index = pd.MultiIndex.from_product(self.axes.labels[:-1], names=_axes_names) else: index = pd.Index(['']) if fold_last_axis_name: - index.name = self.axes.names[-1] + index.name = axes_names[-1] data = np.asarray(self).reshape(len(index), len(columns)) df = pd.DataFrame(data, index, columns) if dropna is not None: diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 0615f987e..2a42cea24 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -3454,6 +3454,17 @@ def test_from_series(): assert_array_equal(res, expected) +def test_to_frame(): + # array containing anonymous axes + arr = ndtest((Axis(2), Axis(2), Axis(2))) + df = arr.to_frame() + assert df.index.name is None + assert df.index.names == ['{0}*', '{1}*'] + assert df.columns.name == '{2}*' + assert list(df.index.values) == [(0, 0), (0, 1), (1, 0), (1, 1)] + assert list(df.columns.values) == [0, 1] + + def test_from_frame(): # 1) data = scalar # ================ From 787f9ca63e0401f0c8178ff2d246a9f5beb7184b Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Fri, 30 Aug 2019 14:52:14 +0200 Subject: [PATCH 5/8] (issue 805) updated from_frame(): parse each axis name to check it represents an anonymous and/or wildcard axis --- larray/inout/pandas.py | 11 ++++++++++- larray/tests/test_array.py | 12 ++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py index 88afa37d2..43fc88a3a 100644 --- a/larray/inout/pandas.py +++ b/larray/inout/pandas.py @@ -251,7 +251,16 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo axes_names = [str(name) if name is not None else name for name in axes_names] - axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)]) + def _to_axis(labels, name): + if name is not None: + if name[-1] == '*': + labels = len(labels) + name = name[:-1] + if _anonymous_axis_pattern.match(name): + name = None + return Axis(labels, name) + + axes = AxisCollection([_to_axis(labels, name) for labels, name in zip(axes_labels, axes_names)]) data = df.values.reshape(axes.shape) return LArray(data, axes, meta=meta) diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py index 2a42cea24..4df916138 100644 --- a/larray/tests/test_array.py +++ b/larray/tests/test_array.py @@ -3827,6 +3827,18 @@ def test_from_frame(): assert la.axes.names == ['age', 'sex', 'time'] assert_array_equal(la[0, 'F', :], [3722, 3395, 3347]) + # 3C) 3 anonymous axes + # ==================== + arr = ndtest((Axis(2), Axis(2), Axis(2))) + df = arr.to_frame() + + la = from_frame(df) + assert la.ndim == 3 + assert la.shape == (2, 2, 2) + for axis in la.axes: + assert axis.name is None + assert axis.iswildcard + # 4) test sort_rows and sort_columns arguments # ============================================ age = Axis('age=2,0,1,3') From 3c6b5e5bca5c7bbdbaa7ea1e74e544f287cc5836 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Fri, 30 Aug 2019 16:19:51 +0200 Subject: [PATCH 6/8] (issue 805): added changelog --- doc/source/changes/version_0_32.rst.inc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc index 6982b703e..0c4668359 100644 --- a/doc/source/changes/version_0_32.rst.inc +++ b/doc/source/changes/version_0_32.rst.inc @@ -60,3 +60,6 @@ Fixes * fixed NaNs and None labels appearing in axes and groups when reading/exporting sessions from/to CSV and Excel files (closes :issue:`804`). + +* fixed importing/exporting anonymous and/or wildcard axes to CSV and Excel files + (closes :issue:`805`). From 830388cf93880aa32a49571eb73293188b870124 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Wed, 28 Aug 2019 15:07:18 +0200 Subject: [PATCH 7/8] improved unit tests for Session objects --- larray/tests/test_session.py | 326 +++++++++++++++-------------------- 1 file changed, 139 insertions(+), 187 deletions(-) diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py index e62f29e33..a8c34ea65 100644 --- a/larray/tests/test_session.py +++ b/larray/tests/test_session.py @@ -29,27 +29,30 @@ def assertObjListEqual(got, expected): a = Axis('a=a0..a2') +a2 = Axis('a=a0..a4') +anonymous = Axis(4) a01 = a['a0,a1'] >> 'a01' -b = Axis('b=b0..b2') -b12 = b['b1,b2'] >> 'b12' +ano01 = a['a0,a1'] +b = Axis('b=0..4') +b024 = b[[0, 2, 4]] >> 'b024' c = 'c' d = {} -e = ndtest([(2, 'a0'), (3, 'a1')]) +e = ndtest([(2, 'a'), (3, 'b')]) _e = ndtest((3, 3)) -e2 = ndtest(('a=a0..a2', 'b=b0..b2')) -f = ndtest([(3, 'a0'), (2, 'a1')]) -g = ndtest([(2, 'a0'), (4, 'a1')]) +f = ndtest((Axis(3), Axis(2))) +g = ndtest([(2, 'a'), (4, 'b')]) +h = ndtest(('a=a0..a2', 'b=b0..b4')) @pytest.fixture() def session(): - return Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), - ('c', c), ('d', d), ('e', e), ('g', g), ('f', f)]) + return Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('c', c), ('d', d), ('e', e), ('g', g), ('f', f), ('h', h)]) def test_init_session(meta): - s = Session(b, b12, a, a01, c=c, d=d, e=e, f=f, g=g) - assert s.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g'] + s = Session(b, b024, a, a01, a2=a2, anonymous=anonymous, ano01=ano01, c=c, d=d, e=e, f=f, g=g, h=h) + assert s.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024', 'c', 'd', 'e', 'f', 'g', 'h'] s = Session(inputpath('test_session.h5')) assert s.names == ['e', 'f', 'g'] @@ -63,24 +66,31 @@ def test_init_session(meta): # assertEqual(s.names, ['e', 'f', 'g']) # metadata - s = Session(b, b12, a, a01, c=c, d=d, e=e, f=f, g=g, meta=meta) + s = Session(b, b024, a, a01, a2=a2, anonymous=anonymous, ano01=ano01, c=c, d=d, e=e, f=f, g=g, h=h, meta=meta) assert s.meta == meta def test_getitem(session): assert session['a'] is a + assert session['a2'] is a2 + assert session['anonymous'] is anonymous assert session['b'] is b assert session['a01'] is a01 - assert session['b12'] is b12 + assert session['ano01'] is ano01 + assert session['b024'] is b024 assert session['c'] == 'c' assert session['d'] == {} + assert equal(session['e'], e) + assert equal(session['h'], h) def test_getitem_list(session): assert list(session[[]]) == [] assert list(session[['b', 'a']]) == [b, a] assert list(session[['a', 'b']]) == [a, b] - assert list(session[['b12', 'a']]) == [b12, a] + assert list(session[['a', 'a2']]) == [a, a2] + assert list(session[['anonymous', 'ano01']]) == [anonymous, ano01] + assert list(session[['b024', 'a']]) == [b024, a] assert list(session[['e', 'a01']]) == [e, a01] assert list(session[['a', 'e', 'g']]) == [a, e, g] assert list(session[['g', 'a', 'e']]) == [g, a, e] @@ -92,7 +102,7 @@ def test_getitem_larray(session): res_eq = s1[s1.element_equals(s2)] res_neq = s1[~(s1.element_equals(s2))] assert list(res_eq) == [f] - assert list(res_neq) == [e, g] + assert list(res_neq) == [e, g, h] def test_setitem(session): @@ -103,173 +113,138 @@ def test_setitem(session): def test_getattr(session): assert session.a is a + assert session.a2 is a2 + assert session.anonymous is anonymous assert session.b is b assert session.a01 is a01 - assert session.b12 is b12 + assert session.ano01 is ano01 + assert session.b024 is b024 assert session.c == 'c' assert session.d == {} def test_setattr(session): s = session.copy() - s.h = 'h' - assert s.h == 'h' + s.i = 'i' + assert s.i == 'i' def test_add(session): - h = Axis('h=h0..h2') - h01 = h['h0,h1'] >> 'h01' - session.add(h, h01, i='i') - assert h.equals(session.h) - assert h01 == session.h01 - assert session.i == 'i' + i = Axis('i=i0..i2') + i01 = i['i0,i1'] >> 'i01' + session.add(i, i01, j='j') + assert i.equals(session.i) + assert i01 == session.i01 + assert session.j == 'j' def test_iter(session): - expected = [b, b12, a, a01, c, d, e, g, f] + expected = [b, b024, a, a2, anonymous, a01, ano01, c, d, e, g, f, h] assertObjListEqual(session, expected) def test_filter(session): session.ax = 'ax' - assertObjListEqual(session.filter(), [b, b12, a, a01, 'c', {}, e, g, f, 'ax']) - assertObjListEqual(session.filter('a*'), [a, a01, 'ax']) + assertObjListEqual(session.filter(), [b, b024, a, a2, anonymous, a01, ano01, 'c', {}, e, g, f, h, 'ax']) + assertObjListEqual(session.filter('a*'), [a, a2, anonymous, a01, ano01, 'ax']) assert list(session.filter('a*', dict)) == [] assert list(session.filter('a*', str)) == ['ax'] - assert list(session.filter('a*', Axis)) == [a] - assert list(session.filter(kind=Axis)) == [b, a] + assert list(session.filter('a*', Axis)) == [a, a2, anonymous] + assert list(session.filter(kind=Axis)) == [b, a, a2, anonymous] assert list(session.filter('a01', Group)) == [a01] - assert list(session.filter(kind=Group)) == [b12, a01] - assertObjListEqual(session.filter(kind=LArray), [e, g, f]) + assert list(session.filter(kind=Group)) == [b024, a01, ano01] + assertObjListEqual(session.filter(kind=LArray), [e, g, f, h]) assert list(session.filter(kind=dict)) == [{}] - assert list(session.filter(kind=(Axis, Group))) == [b, b12, a, a01] + assert list(session.filter(kind=(Axis, Group))) == [b, b024, a, a2, anonymous, a01, ano01] def test_names(session): - assert session.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g'] + assert session.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024', + 'c', 'd', 'e', 'f', 'g', 'h'] # add them in the "wrong" order session.add(i='i') - session.add(h='h') - assert session.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g', 'h', 'i'] + session.add(j='j') + assert session.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024', + 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] -def test_h5_io(tmpdir, session, meta): - fpath = tmp_path(tmpdir, 'test_session.h5') +def _test_io(fpath, session, meta, engine='auto'): session.meta = meta - session.save(fpath) + names = session.filter(kind=(Axis, Group, LArray)).names + # save and load + session.save(fpath, engine=engine) s = Session() - s.load(fpath) - # HDF does *not* keep ordering (ie, keys are always sorted + - # read Axis objects, then Groups objects and finally LArray objects) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] - assert s.meta == meta + s.load(fpath, engine=engine) + # use Session.names instead of Session.keys because CSV, Excel and HDF do *not* keep ordering + assert s.names == names + assert s.equals(session) + for key in s.filter(kind=(Axis, LArray)).keys(): + assert s[key].dtype.kind == session[key].dtype.kind + if engine != 'pandas_excel': + assert s.meta == meta # update a Group + an Axis + an array (overwrite=False) - a2 = Axis('a=0..2') - a2_01 = a2['0,1'] >> 'a01' - e2 = ndtest((a2, 'b=b0..b2')) - Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False) + a3 = Axis('a=0..3') + a3_01 = a3['0,1'] >> 'a01' + e2 = ndtest((a3, 'b=b0..b2')) + Session(a=a3, a01=a3_01, e=e2).save(fpath, overwrite=False, engine=engine) s = Session() - s.load(fpath) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] - assert s['a'].equals(a2) - assert all(s['a01'] == a2_01) + s.load(fpath, engine=engine) + if 'csv' in fpath: + # Session.to_csv() always overwrite the __axes__.csv and __groups__.csv files + new_names = ['a', 'a01', 'e', 'f', 'g', 'h'] + elif engine == 'pandas_excel': + # Session.save() via engine='pandas_excel' always overwrite the output Excel files + new_names = ['a', 'a01', 'e'] + else: + new_names = names + assert s.names == new_names + assert s['a'].equals(a3) + if 'pkl' in fpath: + assert s['a01'].eval() == a3_01.eval() + else: + assert all(s['a01'] == a3_01) assert_array_nan_equal(s['e'], e2) - assert s.meta == meta + if engine != 'pandas_excel': + assert s.meta == meta # load only some objects + session.save(fpath, engine=engine) s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f']) - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] - assert s.meta == meta + s.load(fpath, names=['a', 'a2', 'anonymous', 'a01', 'e', 'f'], engine=engine) + assert s.names == ['a', 'a01', 'a2', 'anonymous', 'e', 'f'] + if engine != 'pandas_excel': + assert s.meta == meta -def test_xlsx_pandas_io(tmpdir, session, meta): - fpath = tmp_path(tmpdir, 'test_session.xlsx') - session.meta = meta - session.save(fpath, engine='pandas_excel') - - s = Session() - s.load(fpath, engine='pandas_excel') - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] - assert s.meta == meta +def test_h5_io(tmpdir, session, meta): + fpath = tmp_path(tmpdir, 'test_session.h5') + _test_io(fpath, session, meta) - # update a Group + an Axis + an array - # XXX: overwrite is not taken into account by the pandas_excel engine - a2 = Axis('a=0..2') - a2_01 = a2['0,1'] >> 'a01' - e2 = ndtest((a2, 'b=b0..b2')) - Session(a=a2, a01=a2_01, e=e2, meta=meta).save(fpath, engine='pandas_excel') - s = Session() - s.load(fpath, engine='pandas_excel') - assert list(s.keys()) == ['a', 'a01', 'e'] - assert s['a'].equals(a2) - assert all(s['a01'] == a2_01) - assert_array_nan_equal(s['e'], e2) - assert s.meta == meta - # load only some objects - session.save(fpath, engine='pandas_excel') - s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pandas_excel') - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] - assert s.meta == meta +def test_xlsx_pandas_io(tmpdir, session, meta): + fpath = tmp_path(tmpdir, 'test_session.xlsx') + _test_io(fpath, session, meta, engine='pandas_excel') @needs_xlwings def test_xlsx_xlwings_io(tmpdir, session, meta): - fpath = tmp_path(tmpdir, 'test_session_xw.xlsx') - session.meta = meta - # test save when Excel file does not exist - session.save(fpath, engine='xlwings_excel') - - s = Session() - s.load(fpath, engine='xlwings_excel') - # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] - assert s.meta == meta - - # update a Group + an Axis + an array (overwrite=False) - a2 = Axis('a=0..2') - a2_01 = a2['0,1'] >> 'a01' - e2 = ndtest((a2, 'b=b0..b2')) - Session(a=a2, a01=a2_01, e=e2).save(fpath, engine='xlwings_excel', overwrite=False) - s = Session() - s.load(fpath, engine='xlwings_excel') - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f'] - assert s['a'].equals(a2) - assert all(s['a01'] == a2_01) - assert_array_nan_equal(s['e'], e2) - assert s.meta == meta - - # load only some objects - s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='xlwings_excel') - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] - assert s.meta == meta + fpath = tmp_path(tmpdir, 'test_session.xlsx') + _test_io(fpath, session, meta, engine='xlwings_excel') def test_csv_io(tmpdir, session, meta): + fpath = tmp_path(tmpdir, 'test_session_csv') try: - fpath = tmp_path(tmpdir, 'test_session_csv') - session.meta = meta - session.to_csv(fpath) + _test_io(fpath, session, meta) - # test loading a directory - s = Session() - s.load(fpath, engine='pandas_csv') - # CSV cannot keep ordering (so we always sort keys) - # Also, Axis objects are read first, then Groups objects and finally LArray objects - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] - assert s.meta == meta + names = session.filter(kind=(Axis, Group, LArray)).names # test loading with a pattern pattern = os.path.join(fpath, '*.csv') s = Session(pattern) - # s = Session() - # s.load(pattern) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] + assert s.names == names assert s.meta == meta # create an invalid .csv file @@ -284,13 +259,7 @@ def test_csv_io(tmpdir, session, meta): # test loading a pattern, ignoring invalid/unsupported files s = Session() s.load(pattern, ignore_exceptions=True) - assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g'] - assert s.meta == meta - - # load only some objects - s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f']) - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] + assert s.names == names assert s.meta == meta finally: shutil.rmtree(fpath) @@ -298,34 +267,7 @@ def test_csv_io(tmpdir, session, meta): def test_pickle_io(tmpdir, session, meta): fpath = tmp_path(tmpdir, 'test_session.pkl') - session.meta = meta - session.save(fpath) - - s = Session() - s.load(fpath, engine='pickle') - assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f'] - assert s.meta == meta - - # update a Group + an Axis + an array (overwrite=False) - a2 = Axis('a=0..2') - a2_01 = a2['0,1'] >> 'a01' - e2 = ndtest((a2, 'b=b0..b2')) - Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False) - s = Session() - s.load(fpath, engine='pickle') - assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f'] - assert s['a'].equals(a2) - assert isinstance(a2_01, Group) - assert isinstance(s['a01'], Group) - assert s['a01'].eval() == a2_01.eval() - assert_array_nan_equal(s['e'], e2) - assert s.meta == meta - - # load only some objects - s = Session() - s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pickle') - assert list(s.keys()) == ['a', 'a01', 'e', 'f'] - assert s.meta == meta + _test_io(fpath, session, meta) def test_to_globals(session): @@ -362,66 +304,76 @@ def test_to_globals(session): def test_element_equals(session): sess = session.filter(kind=(Axis, Group, LArray)) - expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), - ('e', e), ('g', g), ('f', f)]) + expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)]) assert all(sess.element_equals(expected)) - other = Session({'a': a, 'a01': a01, 'e': e, 'f': f}) + other = Session([('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)]) res = sess.element_equals(other) assert res.ndim == 1 assert res.axes.names == ['name'] - assert np.array_equal(res.axes.labels[0], ['b', 'b12', 'a', 'a01', 'e', 'g', 'f']) - assert list(res) == [False, False, True, True, True, False, True] + assert np.array_equal(res.axes.labels[0], ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', + 'e', 'g', 'f', 'h']) + assert list(res) == [False, False, True, True, True, True, True, True, False, True, True] e2 = e.copy() e2.i[1, 1] = 42 - other = Session({'a': a, 'a01': a01, 'e': e2, 'f': f}) + other = Session([('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)]) res = sess.element_equals(other) assert res.axes.names == ['name'] - assert np.array_equal(res.axes.labels[0], ['b', 'b12', 'a', 'a01', 'e', 'g', 'f']) - assert list(res) == [False, False, True, True, False, False, True] + assert np.array_equal(res.axes.labels[0], ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', + 'e', 'g', 'f', 'h']) + assert list(res) == [False, False, True, True, True, True, True, False, False, True, True] def test_eq(session): sess = session.filter(kind=(Axis, Group, LArray)) - expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), - ('e', e), ('g', g), ('f', f)]) + expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)]) assert all([item.all() if isinstance(item, LArray) else item for item in (sess == expected).values()]) - other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('f', f)]) + other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)]) res = sess == other - assert list(res.keys()) == ['b', 'b12', 'a', 'a01', 'e', 'g', 'f'] + assert list(res.keys()) == ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', + 'e', 'g', 'f', 'h'] assert [item.all() if isinstance(item, LArray) else item - for item in res.values()] == [True, True, True, True, True, False, True] + for item in res.values()] == [True, True, True, True, True, True, True, True, False, True, True] e2 = e.copy() e2.i[1, 1] = 42 - other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e2), ('f', f)]) + other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)]) res = sess == other assert [item.all() if isinstance(item, LArray) else item - for item in res.values()] == [True, True, True, True, False, False, True] + for item in res.values()] == [True, True, True, True, True, True, True, False, False, True, True] def test_ne(session): sess = session.filter(kind=(Axis, Group, LArray)) - expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), - ('e', e), ('g', g), ('f', f)]) + expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)]) assert ([(~item).all() if isinstance(item, LArray) else not item for item in (sess != expected).values()]) - other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('f', f)]) + other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)]) res = sess != other - assert list(res.keys()) == ['b', 'b12', 'a', 'a01', 'e', 'g', 'f'] + assert list(res.keys()) == ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01', + 'e', 'g', 'f', 'h'] assert [(~item).all() if isinstance(item, LArray) else not item - for item in res.values()] == [True, True, True, True, True, False, True] + for item in res.values()] == [True, True, True, True, True, True, True, True, False, True, True] e2 = e.copy() e2.i[1, 1] = 42 - other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e2), ('f', f)]) + other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous), + ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)]) res = sess != other assert [(~item).all() if isinstance(item, LArray) else not item - for item in res.values()] == [True, True, True, True, False, False, True] + for item in res.values()] == [True, True, True, True, True, True, True, False, False, True, True] def test_sub(session): @@ -548,27 +500,27 @@ def test_local_arrays(): def test_global_arrays(): # exclude private global arrays s = global_arrays() - s_expected = Session([('e', e), ('e2', e2), ('f', f), ('g', g)]) + s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h)]) assert s.equals(s_expected) # all global arrays s = global_arrays(include_private=True) - s_expected = Session([('e', e), ('_e', _e), ('e2', e2), ('f', f), ('g', g)]) + s_expected = Session([('e', e), ('_e', _e), ('f', f), ('g', g), ('h', h)]) assert s.equals(s_expected) def test_arrays(): - h = ndtest(2) - _h = ndtest(3) + i = ndtest(2) + _i = ndtest(3) # exclude private arrays s = arrays() - s_expected = Session([('e', e), ('e2', e2), ('f', f), ('g', g), ('h', h)]) + s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h), ('i', i)]) assert s.equals(s_expected) # all arrays s = arrays(include_private=True) - s_expected = Session([('_e', _e), ('_h', _h), ('e', e), ('e2', e2), ('f', f), ('g', g), ('h', h)]) + s_expected = Session([('_e', _e), ('_i', _i), ('e', e), ('f', f), ('g', g), ('h', h), ('i', i)]) assert s.equals(s_expected) From 0844710998576fd81526a3d20f3a1166ae396177 Mon Sep 17 00:00:00 2001 From: Alix Damman Date: Mon, 2 Sep 2019 16:11:28 +0200 Subject: [PATCH 8/8] skip testing dtype in _test_io() if Python 2.7 --- larray/tests/test_session.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py index a8c34ea65..bdf1e437e 100644 --- a/larray/tests/test_session.py +++ b/larray/tests/test_session.py @@ -10,7 +10,7 @@ from larray.tests.common import assert_array_nan_equal, inputpath, tmp_path, meta, needs_xlwings from larray import (Session, Axis, LArray, Group, isnan, zeros_like, ndtest, ones_like, local_arrays, global_arrays, arrays) -from larray.util.misc import pickle +from larray.util.misc import pickle, PY2 def equal(o1, o2): @@ -179,8 +179,9 @@ def _test_io(fpath, session, meta, engine='auto'): # use Session.names instead of Session.keys because CSV, Excel and HDF do *not* keep ordering assert s.names == names assert s.equals(session) - for key in s.filter(kind=(Axis, LArray)).keys(): - assert s[key].dtype.kind == session[key].dtype.kind + if not PY2: + for key in s.filter(kind=(Axis, LArray)).keys(): + assert s[key].dtype.kind == session[key].dtype.kind if engine != 'pandas_excel': assert s.meta == meta