From 26ab2bae9bdfa6d32d44a5e5a78cc69c234b5fd2 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Wed, 28 Aug 2019 15:45:11 +0200
Subject: [PATCH 1/8] fix #803 : - included key associated with Axis and Group
 objects when exporting Session objects to CSV or Excel files - removed
 useless kwargs['name'] = key in PandasHDFHandler._read_item()

---
 doc/source/changes/version_0_32.rst.inc |  3 +-
 larray/inout/csv.py                     | 10 +++---
 larray/inout/excel.py                   | 20 ++++++------
 larray/inout/hdf.py                     |  2 --
 larray/inout/pandas.py                  | 41 ++++++++++++++-----------
 5 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc
index bbb2f6750..b2e125b7e 100644
--- a/doc/source/changes/version_0_32.rst.inc
+++ b/doc/source/changes/version_0_32.rst.inc
@@ -55,4 +55,5 @@ Miscellaneous improvements
 Fixes
 ^^^^^
 
-* fixed something (closes :issue:`1`).
+* fixed reading/exporting sessions containing two or more axes/groups
+  with the same name (or anonymous) from/to CSV, Excel and HDF files (closes :issue:`803`).
diff --git a/larray/inout/csv.py b/larray/inout/csv.py
index 440771afe..621f44075 100644
--- a/larray/inout/csv.py
+++ b/larray/inout/csv.py
@@ -329,15 +329,15 @@ def list_items(self):
             pass
         try:
             fnames.remove('__axes__')
-            items = [(name, 'Axis') for name in sorted(self.axes.keys())]
+            items = [(key, 'Axis') for key in sorted(self.axes.keys())]
         except:
             pass
         try:
             fnames.remove('__groups__')
-            items += [(name, 'Group') for name in sorted(self.groups.keys())]
+            items += [(key, 'Group') for key in sorted(self.groups.keys())]
         except:
             pass
-        items += [(name, 'Array') for name in fnames]
+        items += [(key, 'Array') for key in fnames]
         return items
 
     def _read_item(self, key, type, *args, **kwargs):
@@ -375,10 +375,10 @@ def _dump_metadata(self, metadata):
 
     def save(self):
         if len(self.axes) > 0:
-            df = _axes_to_df(self.axes.values())
+            df = _axes_to_df(self.axes)
             df.to_csv(self._to_filepath('__axes__'), sep=self.sep, index=False)
         if len(self.groups) > 0:
-            df = _groups_to_df(self.groups.values())
+            df = _groups_to_df(self.groups)
             df.to_csv(self._to_filepath('__groups__'), sep=self.sep, index=False)
 
     def close(self):
diff --git a/larray/inout/excel.py b/larray/inout/excel.py
index d7d7cfff8..c9df21b3b 100644
--- a/larray/inout/excel.py
+++ b/larray/inout/excel.py
@@ -268,15 +268,15 @@ def list_items(self):
             pass
         try:
             sheet_names.remove('__axes__')
-            items = [(name, 'Axis') for name in sorted(self.axes.keys())]
+            items = [(key, 'Axis') for key in sorted(self.axes.keys())]
         except:
             pass
         try:
             sheet_names.remove('__groups__')
-            items += [(name, 'Group') for name in sorted(self.groups.keys())]
+            items += [(key, 'Group') for key in sorted(self.groups.keys())]
         except:
             pass
-        items += [(name, 'Array') for name in sheet_names]
+        items += [(key, 'Array') for key in sheet_names]
         return items
 
     def _read_item(self, key, type, *args, **kwargs):
@@ -316,10 +316,10 @@ def _dump_metadata(self, metadata):
 
     def save(self):
         if len(self.axes) > 0:
-            df = _axes_to_df(self.axes.values())
+            df = _axes_to_df(self.axes)
             df.to_excel(self.handle, '__axes__', index=False, engine='xlsxwriter')
         if len(self.groups) > 0:
-            df = _groups_to_df(self.groups.values())
+            df = _groups_to_df(self.groups)
             df.to_excel(self.handle, '__groups__', index=False, engine='xlsxwriter')
 
     def close(self):
@@ -373,15 +373,15 @@ def list_items(self):
             pass
         try:
             sheet_names.remove('__axes__')
-            items = [(name, 'Axis') for name in sorted(self.axes.keys())]
+            items = [(key, 'Axis') for key in sorted(self.axes.keys())]
         except:
             pass
         try:
             sheet_names.remove('__groups__')
-            items += [(name, 'Group') for name in sorted(self.groups.keys())]
+            items += [(key, 'Group') for key in sorted(self.groups.keys())]
         except:
             pass
-        items += [(name, 'Array') for name in sheet_names]
+        items += [(key, 'Array') for key in sheet_names]
         return items
 
     def _read_item(self, key, type, *args, **kwargs):
@@ -419,11 +419,11 @@ def _dump_metadata(self, metadata):
 
     def save(self):
         if len(self.axes) > 0:
-            df = _axes_to_df(self.axes.values())
+            df = _axes_to_df(self.axes)
             self.handle['__axes__'] = ''
             self.handle['__axes__'][:].options(pd.DataFrame, index=False).value = df
         if len(self.groups) > 0:
-            df = _groups_to_df(self.groups.values())
+            df = _groups_to_df(self.groups)
             self.handle['__groups__'] = ''
             self.handle['__groups__'][:].options(pd.DataFrame, index=False).value = df
         self.handle.save()
diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py
index 92bbc7516..71c39d5e9 100644
--- a/larray/inout/hdf.py
+++ b/larray/inout/hdf.py
@@ -132,10 +132,8 @@ def _read_item(self, key, type, *args, **kwargs):
             hdf_key = '/' + key
         elif type == 'Axis':
             hdf_key = '__axes__/' + key
-            kwargs['name'] = key
         elif type == 'Group':
             hdf_key = '__groups__/' + key
-            kwargs['name'] = key
         else:
             raise TypeError()
         return read_hdf(self.handle, hdf_key, *args, **kwargs)
diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
index a9732f4c0..57093f888 100644
--- a/larray/inout/pandas.py
+++ b/larray/inout/pandas.py
@@ -338,25 +338,35 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
 #    SERIES <--> AXIS, GROUP, META     #
 # #################################### #
 
-def _axis_to_series(axis, dtype=None):
-    return pd.Series(data=axis.labels, name=str(axis), dtype=dtype)
+def _axis_to_series(key, axis, dtype=None):
+    name = '{}:{}'.format(key, axis.name)
+    return pd.Series(data=axis.labels, name=name, dtype=dtype)
 
 
 def _series_to_axis(series):
-    return Axis(labels=series.values, name=series.name)
+    name = str(series.name)
+    if ':' in name:
+        key, axis_name = name.split(':')
+    else:
+        # for backward compatibility
+        key = axis_name = name
+    return key, Axis(labels=series.values, name=axis_name)
 
 
-def _group_to_series(group, dtype=None):
-    name = group.name if group.name is not None else '{?}'
+def _group_to_series(key, group, dtype=None):
     if group.axis.name is None:
         raise ValueError("Cannot save a group with an anonymous associated axis")
-    name += '@{}'.format(group.axis.name)
+    name = '{}:{}@{}'.format(key, group.name, group.axis.name)
     return pd.Series(data=group.eval(), name=name, dtype=dtype)
 
 
-def _series_to_group(series, axis):
-    name = series.name.split('@')[0]
-    return LGroup(key=series.values, name=name, axis=axis)
+def _series_to_group(series, axes):
+    key, name = str(series.name).split(':')
+    group_name, axis_name = name.split('@')
+    if group_name == 'None':
+        group_name = None
+    axis = axes[axis_name]
+    return key, LGroup(key=series.values, name=group_name, axis=axis)
 
 
 # ######################################## #
@@ -364,25 +374,20 @@ def _series_to_group(series, axis):
 # ######################################## #
 
 def _df_to_axes(df):
-    return OrderedDict([(col_name, _series_to_axis(df[col_name])) for col_name in df.columns.values])
+    return OrderedDict([_series_to_axis(df[col_name]) for col_name in df.columns.values])
 
 
 def _axes_to_df(axes):
     # set dtype to np.object otherwise pd.concat below may convert an int row/column as float
     # if trailing NaN need to be added
-    return pd.concat([_axis_to_series(axis, dtype=np.object) for axis in axes], axis=1)
+    return pd.concat([_axis_to_series(key, axis, dtype=np.object) for key, axis in axes.items()], axis=1)
 
 
 def _df_to_groups(df, axes):
-    groups = OrderedDict()
-    for name, values in df.iteritems():
-        group_name, axis_name = name.split('@')
-        axis = axes[axis_name]
-        groups[group_name] = _series_to_group(values, axis)
-    return groups
+    return OrderedDict([_series_to_group(df[col_name], axes) for col_name in df.columns.values])
 
 
 def _groups_to_df(groups):
     # set dtype to np.object otherwise pd.concat below may convert an int row/column as float
     # if trailing NaN need to be added
-    return pd.concat([_group_to_series(group, dtype=np.object) for group in groups], axis=1)
+    return pd.concat([_group_to_series(key, group, dtype=np.object) for key, group in groups.items()], axis=1)

From f9ce292007cd468a32cd8aa0dc7b2ab849972c47 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Thu, 29 Aug 2019 08:22:35 +0200
Subject: [PATCH 2/8] fix #804 : removed trailing NaNs and None values when
 extracting Axis and Group objects from __axes__ and __groups__ special
 sheets/csv files

---
 doc/source/changes/version_0_32.rst.inc |  3 +++
 larray/inout/pandas.py                  | 24 ++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc
index b2e125b7e..6982b703e 100644
--- a/doc/source/changes/version_0_32.rst.inc
+++ b/doc/source/changes/version_0_32.rst.inc
@@ -57,3 +57,6 @@ Fixes
 
 * fixed reading/exporting sessions containing two or more axes/groups
   with the same name (or anonymous) from/to CSV, Excel and HDF files (closes :issue:`803`).
+
+* fixed NaNs and None labels appearing in axes and groups when reading/exporting sessions
+  from/to CSV and Excel files (closes :issue:`804`).
diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
index 57093f888..94552a7ce 100644
--- a/larray/inout/pandas.py
+++ b/larray/inout/pandas.py
@@ -338,6 +338,24 @@ def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header
 #    SERIES <--> AXIS, GROUP, META     #
 # #################################### #
 
+def _extract_labels_from_series(series):
+    # remove trailing NaN or None values
+    # (multiple Axis or Group objects of different lengths
+    # are stored in the same DataFrame leading to trailing
+    # NaNs or None values when split into series)
+    series = series.loc[:series.last_valid_index()]
+
+    labels = np.asarray(series.values)
+    # integer labels of axes or groups may have been converted to float values
+    # because of trailing NaNs
+    if labels.dtype.kind == 'f' and all([label.is_integer() for label in labels]):
+        labels = labels.astype(int)
+    # if dtype is still object, we assume values are strings
+    if labels.dtype.kind == 'O':
+        labels = labels.astype(str)
+    return labels
+
+
 def _axis_to_series(key, axis, dtype=None):
     name = '{}:{}'.format(key, axis.name)
     return pd.Series(data=axis.labels, name=name, dtype=dtype)
@@ -345,12 +363,13 @@ def _axis_to_series(key, axis, dtype=None):
 
 def _series_to_axis(series):
     name = str(series.name)
+    labels = _extract_labels_from_series(series)
     if ':' in name:
         key, axis_name = name.split(':')
     else:
         # for backward compatibility
         key = axis_name = name
-    return key, Axis(labels=series.values, name=axis_name)
+    return key, Axis(labels=labels, name=axis_name)
 
 
 def _group_to_series(key, group, dtype=None):
@@ -366,7 +385,8 @@ def _series_to_group(series, axes):
     if group_name == 'None':
         group_name = None
     axis = axes[axis_name]
-    return key, LGroup(key=series.values, name=group_name, axis=axis)
+    group_key = _extract_labels_from_series(series)
+    return key, LGroup(key=group_key, name=group_name, axis=axis)
 
 
 # ######################################## #

From 8cf1fc9933d164cd98cdba090717eaa787dd8105 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Fri, 30 Aug 2019 15:57:51 +0200
Subject: [PATCH 3/8] (issue 805): anonymous and/or wildcard axes are handled
 correctly when using Session.save() and Session.load()

---
 larray/inout/pandas.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
index 94552a7ce..88afa37d2 100644
--- a/larray/inout/pandas.py
+++ b/larray/inout/pandas.py
@@ -1,5 +1,6 @@
 from __future__ import absolute_import, print_function
 
+import re
 from itertools import product
 from collections import OrderedDict
 
@@ -140,6 +141,9 @@ def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs):
         return LArray(s.values, Axis(s.index.values, name), meta=meta)
 
 
+_anonymous_axis_pattern = re.compile(r'\{(\d+|\??)\}\*?')
+
+
 def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfold_last_axis_name=False,
                fill_value=nan, meta=None, cartesian_prod=True, **kwargs):
     r"""
@@ -357,8 +361,9 @@ def _extract_labels_from_series(series):
 
 
 def _axis_to_series(key, axis, dtype=None):
-    name = '{}:{}'.format(key, axis.name)
-    return pd.Series(data=axis.labels, name=name, dtype=dtype)
+    name = '{}:{}'.format(key, str(axis))
+    labels = len(axis) if axis.iswildcard else axis.labels
+    return pd.Series(data=labels, name=name, dtype=dtype)
 
 
 def _series_to_axis(series):
@@ -366,6 +371,10 @@ def _series_to_axis(series):
     labels = _extract_labels_from_series(series)
     if ':' in name:
         key, axis_name = name.split(':')
+        if axis_name[-1] == '*':
+            labels = labels[0]
+        if _anonymous_axis_pattern.match(axis_name):
+            axis_name = None
     else:
         # for backward compatibility
         key = axis_name = name

From 234825749f4eb7101669de00c673f8757f924af2 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Fri, 30 Aug 2019 11:28:23 +0200
Subject: [PATCH 4/8] (issue 805) updated LArray.to_frame(): call
 AxisCollection.display_names property at the beginning to to make
 LArray.to_frame() consistent with LArray.dump() (and then to make it possible
 to handle anonymous and/or wildcard axes when dealing with CSV and Excel
 formats)

---
 larray/core/array.py       | 15 +++++++--------
 larray/tests/test_array.py | 11 +++++++++++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 17612fa91..a16ee22ab 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -1185,22 +1185,21 @@ def to_frame(self, fold_last_axis_name=False, dropna=None):
            b1    6   7
         """
         columns = pd.Index(self.axes[-1].labels)
+        axes_names = self.axes.display_names[:]
         if not fold_last_axis_name:
-            columns.name = self.axes[-1].name
+            columns.name = axes_names[-1]
         if self.ndim > 1:
-            axes_names = self.axes.names[:-1]
+            _axes_names = axes_names[:-1]
             if fold_last_axis_name:
-                tmp = axes_names[-1] if axes_names[-1] is not None else ''
-                if self.axes[-1].name:
-                    axes_names[-1] = "{}\\{}".format(tmp, self.axes[-1].name)
+                _axes_names[-1] = "{}\\{}".format(_axes_names[-1], axes_names[-1])
             if self.ndim == 2:
-                index = pd.Index(data=self.axes[0].labels, name=axes_names[0])
+                index = pd.Index(data=self.axes[0].labels, name=_axes_names[0])
             else:
-                index = pd.MultiIndex.from_product(self.axes.labels[:-1], names=axes_names)
+                index = pd.MultiIndex.from_product(self.axes.labels[:-1], names=_axes_names)
         else:
             index = pd.Index([''])
             if fold_last_axis_name:
-                index.name = self.axes.names[-1]
+                index.name = axes_names[-1]
         data = np.asarray(self).reshape(len(index), len(columns))
         df = pd.DataFrame(data, index, columns)
         if dropna is not None:
diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py
index 0615f987e..2a42cea24 100644
--- a/larray/tests/test_array.py
+++ b/larray/tests/test_array.py
@@ -3454,6 +3454,17 @@ def test_from_series():
     assert_array_equal(res, expected)
 
 
+def test_to_frame():
+    # array containing anonymous axes
+    arr = ndtest((Axis(2), Axis(2), Axis(2)))
+    df = arr.to_frame()
+    assert df.index.name is None
+    assert df.index.names == ['{0}*', '{1}*']
+    assert df.columns.name == '{2}*'
+    assert list(df.index.values) == [(0, 0), (0, 1), (1, 0), (1, 1)]
+    assert list(df.columns.values) == [0, 1]
+
+
 def test_from_frame():
     # 1) data = scalar
     # ================

From 787f9ca63e0401f0c8178ff2d246a9f5beb7184b Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Fri, 30 Aug 2019 14:52:14 +0200
Subject: [PATCH 5/8] (issue 805) updated from_frame(): parse each axis name to
 check it represents an anonymous and/or wildcard axis

---
 larray/inout/pandas.py     | 11 ++++++++++-
 larray/tests/test_array.py | 12 ++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
index 88afa37d2..43fc88a3a 100644
--- a/larray/inout/pandas.py
+++ b/larray/inout/pandas.py
@@ -251,7 +251,16 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
     axes_names = [str(name) if name is not None else name
                   for name in axes_names]
 
-    axes = AxisCollection([Axis(labels, name) for labels, name in zip(axes_labels, axes_names)])
+    def _to_axis(labels, name):
+        if name is not None:
+            if name[-1] == '*':
+                labels = len(labels)
+                name = name[:-1]
+            if _anonymous_axis_pattern.match(name):
+                name = None
+        return Axis(labels, name)
+
+    axes = AxisCollection([_to_axis(labels, name) for labels, name in zip(axes_labels, axes_names)])
     data = df.values.reshape(axes.shape)
     return LArray(data, axes, meta=meta)
 
diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py
index 2a42cea24..4df916138 100644
--- a/larray/tests/test_array.py
+++ b/larray/tests/test_array.py
@@ -3827,6 +3827,18 @@ def test_from_frame():
     assert la.axes.names == ['age', 'sex', 'time']
     assert_array_equal(la[0, 'F', :], [3722, 3395, 3347])
 
+    # 3C) 3 anonymous axes
+    # ====================
+    arr = ndtest((Axis(2), Axis(2), Axis(2)))
+    df = arr.to_frame()
+
+    la = from_frame(df)
+    assert la.ndim == 3
+    assert la.shape == (2, 2, 2)
+    for axis in la.axes:
+        assert axis.name is None
+        assert axis.iswildcard
+
     # 4) test sort_rows and sort_columns arguments
     # ============================================
     age = Axis('age=2,0,1,3')

From 3c6b5e5bca5c7bbdbaa7ea1e74e544f287cc5836 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Fri, 30 Aug 2019 16:19:51 +0200
Subject: [PATCH 6/8] (issue 805): added changelog

---
 doc/source/changes/version_0_32.rst.inc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc
index 6982b703e..0c4668359 100644
--- a/doc/source/changes/version_0_32.rst.inc
+++ b/doc/source/changes/version_0_32.rst.inc
@@ -60,3 +60,6 @@ Fixes
 
 * fixed NaNs and None labels appearing in axes and groups when reading/exporting sessions
   from/to CSV and Excel files (closes :issue:`804`).
+
+* fixed importing/exporting anonymous and/or wildcard axes to CSV and Excel files
+  (closes :issue:`805`).

From 830388cf93880aa32a49571eb73293188b870124 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Wed, 28 Aug 2019 15:07:18 +0200
Subject: [PATCH 7/8] improved unit tests for Session objects

---
 larray/tests/test_session.py | 326 +++++++++++++++--------------------
 1 file changed, 139 insertions(+), 187 deletions(-)

diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py
index e62f29e33..a8c34ea65 100644
--- a/larray/tests/test_session.py
+++ b/larray/tests/test_session.py
@@ -29,27 +29,30 @@ def assertObjListEqual(got, expected):
 
 
 a = Axis('a=a0..a2')
+a2 = Axis('a=a0..a4')
+anonymous = Axis(4)
 a01 = a['a0,a1'] >> 'a01'
-b = Axis('b=b0..b2')
-b12 = b['b1,b2'] >> 'b12'
+ano01 = a['a0,a1']
+b = Axis('b=0..4')
+b024 = b[[0, 2, 4]] >> 'b024'
 c = 'c'
 d = {}
-e = ndtest([(2, 'a0'), (3, 'a1')])
+e = ndtest([(2, 'a'), (3, 'b')])
 _e = ndtest((3, 3))
-e2 = ndtest(('a=a0..a2', 'b=b0..b2'))
-f = ndtest([(3, 'a0'), (2, 'a1')])
-g = ndtest([(2, 'a0'), (4, 'a1')])
+f = ndtest((Axis(3), Axis(2)))
+g = ndtest([(2, 'a'), (4, 'b')])
+h = ndtest(('a=a0..a2', 'b=b0..b4'))
 
 
 @pytest.fixture()
 def session():
-    return Session([('b', b), ('b12', b12), ('a', a), ('a01', a01),
-                    ('c', c), ('d', d), ('e', e), ('g', g), ('f', f)])
+    return Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous),
+                    ('a01', a01), ('ano01', ano01), ('c', c), ('d', d), ('e', e), ('g', g), ('f', f), ('h', h)])
 
 
 def test_init_session(meta):
-    s = Session(b, b12, a, a01, c=c, d=d, e=e, f=f, g=g)
-    assert s.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g']
+    s = Session(b, b024, a, a01, a2=a2, anonymous=anonymous, ano01=ano01, c=c, d=d, e=e, f=f, g=g, h=h)
+    assert s.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024', 'c', 'd', 'e', 'f', 'g', 'h']
 
     s = Session(inputpath('test_session.h5'))
     assert s.names == ['e', 'f', 'g']
@@ -63,24 +66,31 @@ def test_init_session(meta):
     # assertEqual(s.names, ['e', 'f', 'g'])
 
     # metadata
-    s = Session(b, b12, a, a01, c=c, d=d, e=e, f=f, g=g, meta=meta)
+    s = Session(b, b024, a, a01, a2=a2, anonymous=anonymous, ano01=ano01, c=c, d=d, e=e, f=f, g=g, h=h, meta=meta)
     assert s.meta == meta
 
 
 def test_getitem(session):
     assert session['a'] is a
+    assert session['a2'] is a2
+    assert session['anonymous'] is anonymous
     assert session['b'] is b
     assert session['a01'] is a01
-    assert session['b12'] is b12
+    assert session['ano01'] is ano01
+    assert session['b024'] is b024
     assert session['c'] == 'c'
     assert session['d'] == {}
+    assert equal(session['e'], e)
+    assert equal(session['h'], h)
 
 
 def test_getitem_list(session):
     assert list(session[[]]) == []
     assert list(session[['b', 'a']]) == [b, a]
     assert list(session[['a', 'b']]) == [a, b]
-    assert list(session[['b12', 'a']]) == [b12, a]
+    assert list(session[['a', 'a2']]) == [a, a2]
+    assert list(session[['anonymous', 'ano01']]) == [anonymous, ano01]
+    assert list(session[['b024', 'a']]) == [b024, a]
     assert list(session[['e', 'a01']]) == [e, a01]
     assert list(session[['a', 'e', 'g']]) == [a, e, g]
     assert list(session[['g', 'a', 'e']]) == [g, a, e]
@@ -92,7 +102,7 @@ def test_getitem_larray(session):
     res_eq = s1[s1.element_equals(s2)]
     res_neq = s1[~(s1.element_equals(s2))]
     assert list(res_eq) == [f]
-    assert list(res_neq) == [e, g]
+    assert list(res_neq) == [e, g, h]
 
 
 def test_setitem(session):
@@ -103,173 +113,138 @@ def test_setitem(session):
 
 def test_getattr(session):
     assert session.a is a
+    assert session.a2 is a2
+    assert session.anonymous is anonymous
     assert session.b is b
     assert session.a01 is a01
-    assert session.b12 is b12
+    assert session.ano01 is ano01
+    assert session.b024 is b024
     assert session.c == 'c'
     assert session.d == {}
 
 
 def test_setattr(session):
     s = session.copy()
-    s.h = 'h'
-    assert s.h == 'h'
+    s.i = 'i'
+    assert s.i == 'i'
 
 
 def test_add(session):
-    h = Axis('h=h0..h2')
-    h01 = h['h0,h1'] >> 'h01'
-    session.add(h, h01, i='i')
-    assert h.equals(session.h)
-    assert h01 == session.h01
-    assert session.i == 'i'
+    i = Axis('i=i0..i2')
+    i01 = i['i0,i1'] >> 'i01'
+    session.add(i, i01, j='j')
+    assert i.equals(session.i)
+    assert i01 == session.i01
+    assert session.j == 'j'
 
 
 def test_iter(session):
-    expected = [b, b12, a, a01, c, d, e, g, f]
+    expected = [b, b024, a, a2, anonymous, a01, ano01, c, d, e, g, f, h]
     assertObjListEqual(session, expected)
 
 
 def test_filter(session):
     session.ax = 'ax'
-    assertObjListEqual(session.filter(), [b, b12, a, a01, 'c', {}, e, g, f, 'ax'])
-    assertObjListEqual(session.filter('a*'), [a, a01, 'ax'])
+    assertObjListEqual(session.filter(), [b, b024, a, a2, anonymous, a01, ano01, 'c', {}, e, g, f, h, 'ax'])
+    assertObjListEqual(session.filter('a*'), [a, a2, anonymous, a01, ano01, 'ax'])
     assert list(session.filter('a*', dict)) == []
     assert list(session.filter('a*', str)) == ['ax']
-    assert list(session.filter('a*', Axis)) == [a]
-    assert list(session.filter(kind=Axis)) == [b, a]
+    assert list(session.filter('a*', Axis)) == [a, a2, anonymous]
+    assert list(session.filter(kind=Axis)) == [b, a, a2, anonymous]
     assert list(session.filter('a01', Group)) == [a01]
-    assert list(session.filter(kind=Group)) == [b12, a01]
-    assertObjListEqual(session.filter(kind=LArray), [e, g, f])
+    assert list(session.filter(kind=Group)) == [b024, a01, ano01]
+    assertObjListEqual(session.filter(kind=LArray), [e, g, f, h])
     assert list(session.filter(kind=dict)) == [{}]
-    assert list(session.filter(kind=(Axis, Group))) == [b, b12, a, a01]
+    assert list(session.filter(kind=(Axis, Group))) == [b, b024, a, a2, anonymous, a01, ano01]
 
 
 def test_names(session):
-    assert session.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g']
+    assert session.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024',
+                             'c', 'd', 'e', 'f', 'g', 'h']
     # add them in the "wrong" order
     session.add(i='i')
-    session.add(h='h')
-    assert session.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
+    session.add(j='j')
+    assert session.names == ['a', 'a01', 'a2', 'ano01', 'anonymous', 'b', 'b024',
+                             'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
 
 
-def test_h5_io(tmpdir, session, meta):
-    fpath = tmp_path(tmpdir, 'test_session.h5')
+def _test_io(fpath, session, meta, engine='auto'):
     session.meta = meta
-    session.save(fpath)
+    names = session.filter(kind=(Axis, Group, LArray)).names
 
+    # save and load
+    session.save(fpath, engine=engine)
     s = Session()
-    s.load(fpath)
-    # HDF does *not* keep ordering (ie, keys are always sorted +
-    # read Axis objects, then Groups objects and finally LArray objects)
-    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
-    assert s.meta == meta
+    s.load(fpath, engine=engine)
+    # use Session.names instead of Session.keys because CSV, Excel and HDF do *not* keep ordering
+    assert s.names == names
+    assert s.equals(session)
+    for key in s.filter(kind=(Axis, LArray)).keys():
+        assert s[key].dtype.kind == session[key].dtype.kind
+    if engine != 'pandas_excel':
+        assert s.meta == meta
 
     # update a Group + an Axis + an array (overwrite=False)
-    a2 = Axis('a=0..2')
-    a2_01 = a2['0,1'] >> 'a01'
-    e2 = ndtest((a2, 'b=b0..b2'))
-    Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False)
+    a3 = Axis('a=0..3')
+    a3_01 = a3['0,1'] >> 'a01'
+    e2 = ndtest((a3, 'b=b0..b2'))
+    Session(a=a3, a01=a3_01, e=e2).save(fpath, overwrite=False, engine=engine)
     s = Session()
-    s.load(fpath)
-    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
-    assert s['a'].equals(a2)
-    assert all(s['a01'] == a2_01)
+    s.load(fpath, engine=engine)
+    if 'csv' in fpath:
+        # Session.to_csv() always overwrite the __axes__.csv and __groups__.csv files
+        new_names = ['a', 'a01', 'e', 'f', 'g', 'h']
+    elif engine == 'pandas_excel':
+        # Session.save() via engine='pandas_excel' always overwrite the output Excel files
+        new_names = ['a', 'a01', 'e']
+    else:
+        new_names = names
+    assert s.names == new_names
+    assert s['a'].equals(a3)
+    if 'pkl' in fpath:
+        assert s['a01'].eval() == a3_01.eval()
+    else:
+        assert all(s['a01'] == a3_01)
     assert_array_nan_equal(s['e'], e2)
-    assert s.meta == meta
+    if engine != 'pandas_excel':
+        assert s.meta == meta
 
     # load only some objects
+    session.save(fpath, engine=engine)
     s = Session()
-    s.load(fpath, names=['a', 'a01', 'e', 'f'])
-    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
-    assert s.meta == meta
+    s.load(fpath, names=['a', 'a2', 'anonymous', 'a01', 'e', 'f'], engine=engine)
+    assert s.names == ['a', 'a01', 'a2', 'anonymous', 'e', 'f']
+    if engine != 'pandas_excel':
+        assert s.meta == meta
 
 
-def test_xlsx_pandas_io(tmpdir, session, meta):
-    fpath = tmp_path(tmpdir, 'test_session.xlsx')
-    session.meta = meta
-    session.save(fpath, engine='pandas_excel')
-
-    s = Session()
-    s.load(fpath, engine='pandas_excel')
-    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f']
-    assert s.meta == meta
+def test_h5_io(tmpdir, session, meta):
+    fpath = tmp_path(tmpdir, 'test_session.h5')
+    _test_io(fpath, session, meta)
 
-    # update a Group + an Axis + an array
-    # XXX: overwrite is not taken into account by the pandas_excel engine
-    a2 = Axis('a=0..2')
-    a2_01 = a2['0,1'] >> 'a01'
-    e2 = ndtest((a2, 'b=b0..b2'))
-    Session(a=a2, a01=a2_01, e=e2, meta=meta).save(fpath, engine='pandas_excel')
-    s = Session()
-    s.load(fpath, engine='pandas_excel')
-    assert list(s.keys()) == ['a', 'a01', 'e']
-    assert s['a'].equals(a2)
-    assert all(s['a01'] == a2_01)
-    assert_array_nan_equal(s['e'], e2)
-    assert s.meta == meta
 
-    # load only some objects
-    session.save(fpath, engine='pandas_excel')
-    s = Session()
-    s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pandas_excel')
-    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
-    assert s.meta == meta
+def test_xlsx_pandas_io(tmpdir, session, meta):
+    fpath = tmp_path(tmpdir, 'test_session.xlsx')
+    _test_io(fpath, session, meta, engine='pandas_excel')
 
 
 @needs_xlwings
 def test_xlsx_xlwings_io(tmpdir, session, meta):
-    fpath = tmp_path(tmpdir, 'test_session_xw.xlsx')
-    session.meta = meta
-    # test save when Excel file does not exist
-    session.save(fpath, engine='xlwings_excel')
-
-    s = Session()
-    s.load(fpath, engine='xlwings_excel')
-    # ordering is only kept if the file did not exist previously (otherwise the ordering is left intact)
-    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f']
-    assert s.meta == meta
-
-    # update a Group + an Axis + an array (overwrite=False)
-    a2 = Axis('a=0..2')
-    a2_01 = a2['0,1'] >> 'a01'
-    e2 = ndtest((a2, 'b=b0..b2'))
-    Session(a=a2, a01=a2_01, e=e2).save(fpath, engine='xlwings_excel', overwrite=False)
-    s = Session()
-    s.load(fpath, engine='xlwings_excel')
-    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'g', 'f']
-    assert s['a'].equals(a2)
-    assert all(s['a01'] == a2_01)
-    assert_array_nan_equal(s['e'], e2)
-    assert s.meta == meta
-
-    # load only some objects
-    s = Session()
-    s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='xlwings_excel')
-    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
-    assert s.meta == meta
+    fpath = tmp_path(tmpdir, 'test_session.xlsx')
+    _test_io(fpath, session, meta, engine='xlwings_excel')
 
 
 def test_csv_io(tmpdir, session, meta):
+    fpath = tmp_path(tmpdir, 'test_session_csv')
     try:
-        fpath = tmp_path(tmpdir, 'test_session_csv')
-        session.meta = meta
-        session.to_csv(fpath)
+        _test_io(fpath, session, meta)
 
-        # test loading a directory
-        s = Session()
-        s.load(fpath, engine='pandas_csv')
-        # CSV cannot keep ordering (so we always sort keys)
-        # Also, Axis objects are read first, then Groups objects and finally LArray objects
-        assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
-        assert s.meta == meta
+        names = session.filter(kind=(Axis, Group, LArray)).names
 
         # test loading with a pattern
         pattern = os.path.join(fpath, '*.csv')
         s = Session(pattern)
-        # s = Session()
-        # s.load(pattern)
-        assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
+        assert s.names == names
         assert s.meta == meta
 
         # create an invalid .csv file
@@ -284,13 +259,7 @@ def test_csv_io(tmpdir, session, meta):
         # test loading a pattern, ignoring invalid/unsupported files
         s = Session()
         s.load(pattern, ignore_exceptions=True)
-        assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
-        assert s.meta == meta
-
-        # load only some objects
-        s = Session()
-        s.load(fpath, names=['a', 'a01', 'e', 'f'])
-        assert list(s.keys()) == ['a', 'a01', 'e', 'f']
+        assert s.names == names
         assert s.meta == meta
     finally:
         shutil.rmtree(fpath)
@@ -298,34 +267,7 @@ def test_csv_io(tmpdir, session, meta):
 
 def test_pickle_io(tmpdir, session, meta):
     fpath = tmp_path(tmpdir, 'test_session.pkl')
-    session.meta = meta
-    session.save(fpath)
-
-    s = Session()
-    s.load(fpath, engine='pickle')
-    assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f']
-    assert s.meta == meta
-
-    # update a Group + an Axis + an array (overwrite=False)
-    a2 = Axis('a=0..2')
-    a2_01 = a2['0,1'] >> 'a01'
-    e2 = ndtest((a2, 'b=b0..b2'))
-    Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False)
-    s = Session()
-    s.load(fpath, engine='pickle')
-    assert list(s.keys()) == ['b', 'a', 'b12', 'a01', 'e', 'g', 'f']
-    assert s['a'].equals(a2)
-    assert isinstance(a2_01, Group)
-    assert isinstance(s['a01'], Group)
-    assert s['a01'].eval() == a2_01.eval()
-    assert_array_nan_equal(s['e'], e2)
-    assert s.meta == meta
-
-    # load only some objects
-    s = Session()
-    s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pickle')
-    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
-    assert s.meta == meta
+    _test_io(fpath, session, meta)
 
 
 def test_to_globals(session):
@@ -362,66 +304,76 @@ def test_to_globals(session):
 
 def test_element_equals(session):
     sess = session.filter(kind=(Axis, Group, LArray))
-    expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01),
-                        ('e', e), ('g', g), ('f', f)])
+    expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous),
+                        ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)])
     assert all(sess.element_equals(expected))
 
-    other = Session({'a': a, 'a01': a01, 'e': e, 'f': f})
+    other = Session([('a', a), ('a2', a2), ('anonymous', anonymous),
+                     ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)])
     res = sess.element_equals(other)
     assert res.ndim == 1
     assert res.axes.names == ['name']
-    assert np.array_equal(res.axes.labels[0], ['b', 'b12', 'a', 'a01', 'e', 'g', 'f'])
-    assert list(res) == [False, False, True, True, True, False, True]
+    assert np.array_equal(res.axes.labels[0], ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01',
+                                               'e', 'g', 'f', 'h'])
+    assert list(res) == [False, False, True, True, True, True, True, True, False, True, True]
 
     e2 = e.copy()
     e2.i[1, 1] = 42
-    other = Session({'a': a, 'a01': a01, 'e': e2, 'f': f})
+    other = Session([('a', a), ('a2', a2), ('anonymous', anonymous),
+                     ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)])
     res = sess.element_equals(other)
     assert res.axes.names == ['name']
-    assert np.array_equal(res.axes.labels[0], ['b', 'b12', 'a', 'a01', 'e', 'g', 'f'])
-    assert list(res) == [False, False, True, True, False, False, True]
+    assert np.array_equal(res.axes.labels[0], ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01',
+                                               'e', 'g', 'f', 'h'])
+    assert list(res) == [False, False, True, True, True, True, True, False, False, True, True]
 
 
 def test_eq(session):
     sess = session.filter(kind=(Axis, Group, LArray))
-    expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01),
-                        ('e', e), ('g', g), ('f', f)])
+    expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous),
+                        ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)])
     assert all([item.all() if isinstance(item, LArray) else item
                 for item in (sess == expected).values()])
 
-    other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('f', f)])
+    other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous),
+                     ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)])
     res = sess == other
-    assert list(res.keys()) == ['b', 'b12', 'a', 'a01', 'e', 'g', 'f']
+    assert list(res.keys()) == ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01',
+                                'e', 'g', 'f', 'h']
     assert [item.all() if isinstance(item, LArray) else item
-            for item in res.values()] == [True, True, True, True, True, False, True]
+            for item in res.values()] == [True, True, True, True, True, True, True, True, False, True, True]
 
     e2 = e.copy()
     e2.i[1, 1] = 42
-    other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e2), ('f', f)])
+    other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous),
+                     ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)])
     res = sess == other
     assert [item.all() if isinstance(item, LArray) else item
-            for item in res.values()] == [True, True, True, True, False, False, True]
+            for item in res.values()] == [True, True, True, True, True, True, True, False, False, True, True]
 
 
 def test_ne(session):
     sess = session.filter(kind=(Axis, Group, LArray))
-    expected = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01),
-                        ('e', e), ('g', g), ('f', f)])
+    expected = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous),
+                        ('a01', a01), ('ano01', ano01), ('e', e), ('g', g), ('f', f), ('h', h)])
     assert ([(~item).all() if isinstance(item, LArray) else not item
              for item in (sess != expected).values()])
 
-    other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e), ('f', f)])
+    other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous),
+                     ('a01', a01), ('ano01', ano01), ('e', e), ('f', f), ('h', h)])
     res = sess != other
-    assert list(res.keys()) == ['b', 'b12', 'a', 'a01', 'e', 'g', 'f']
+    assert list(res.keys()) == ['b', 'b024', 'a', 'a2', 'anonymous', 'a01', 'ano01',
+                                'e', 'g', 'f', 'h']
     assert [(~item).all() if isinstance(item, LArray) else not item
-            for item in res.values()] == [True, True, True, True, True, False, True]
+            for item in res.values()] == [True, True, True, True, True, True, True, True, False, True, True]
 
     e2 = e.copy()
     e2.i[1, 1] = 42
-    other = Session([('b', b), ('b12', b12), ('a', a), ('a01', a01), ('e', e2), ('f', f)])
+    other = Session([('b', b), ('b024', b024), ('a', a), ('a2', a2), ('anonymous', anonymous),
+                     ('a01', a01), ('ano01', ano01), ('e', e2), ('f', f), ('h', h)])
     res = sess != other
     assert [(~item).all() if isinstance(item, LArray) else not item
-            for item in res.values()] == [True, True, True, True, False, False, True]
+            for item in res.values()] == [True, True, True, True, True, True, True, False, False, True, True]
 
 
 def test_sub(session):
@@ -548,27 +500,27 @@ def test_local_arrays():
 def test_global_arrays():
     # exclude private global arrays
     s = global_arrays()
-    s_expected = Session([('e', e), ('e2', e2), ('f', f), ('g', g)])
+    s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h)])
     assert s.equals(s_expected)
 
     # all global arrays
     s = global_arrays(include_private=True)
-    s_expected = Session([('e', e), ('_e', _e), ('e2', e2), ('f', f), ('g', g)])
+    s_expected = Session([('e', e), ('_e', _e), ('f', f), ('g', g), ('h', h)])
     assert s.equals(s_expected)
 
 
 def test_arrays():
-    h = ndtest(2)
-    _h = ndtest(3)
+    i = ndtest(2)
+    _i = ndtest(3)
 
     # exclude private arrays
     s = arrays()
-    s_expected = Session([('e', e), ('e2', e2), ('f', f), ('g', g), ('h', h)])
+    s_expected = Session([('e', e), ('f', f), ('g', g), ('h', h), ('i', i)])
     assert s.equals(s_expected)
 
     # all arrays
     s = arrays(include_private=True)
-    s_expected = Session([('_e', _e), ('_h', _h), ('e', e), ('e2', e2), ('f', f), ('g', g), ('h', h)])
+    s_expected = Session([('_e', _e), ('_i', _i), ('e', e), ('f', f), ('g', g), ('h', h), ('i', i)])
     assert s.equals(s_expected)
 
 

From 0844710998576fd81526a3d20f3a1166ae396177 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Mon, 2 Sep 2019 16:11:28 +0200
Subject: [PATCH 8/8] skip testing dtype in _test_io() if Python 2.7

---
 larray/tests/test_session.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py
index a8c34ea65..bdf1e437e 100644
--- a/larray/tests/test_session.py
+++ b/larray/tests/test_session.py
@@ -10,7 +10,7 @@
 from larray.tests.common import assert_array_nan_equal, inputpath, tmp_path, meta, needs_xlwings
 from larray import (Session, Axis, LArray, Group, isnan, zeros_like, ndtest, ones_like,
                     local_arrays, global_arrays, arrays)
-from larray.util.misc import pickle
+from larray.util.misc import pickle, PY2
 
 
 def equal(o1, o2):
@@ -179,8 +179,9 @@ def _test_io(fpath, session, meta, engine='auto'):
     # use Session.names instead of Session.keys because CSV, Excel and HDF do *not* keep ordering
     assert s.names == names
     assert s.equals(session)
-    for key in s.filter(kind=(Axis, LArray)).keys():
-        assert s[key].dtype.kind == session[key].dtype.kind
+    if not PY2:
+        for key in s.filter(kind=(Axis, LArray)).keys():
+            assert s[key].dtype.kind == session[key].dtype.kind
     if engine != 'pandas_excel':
         assert s.meta == meta