larray-project · alixdamman · Oct 24, 2019 · Oct 15, 2019 · Oct 16, 2019 · Oct 18, 2019
diff --git a/doc/source/changes/version_0_32.rst.inc b/doc/source/changes/version_0_32.rst.inc
@@ -10,7 +10,12 @@ Syntax changes
 Backward incompatible changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-* other backward incompatible changes
+* Because it was broken, the possibility to dump and load Axis and Group objects
+  contained in a session has been removed for the CSV and Excel formats.
+  Fixing it would have taken too much time considering it is very rarely used
+  (no one complains it was broken) so the decision to remove it was taken.
+  However, this is still possible using the HDF format.
+  Closes :issue:`815`.
 
 
 New features
@@ -61,3 +66,6 @@ Fixes
 
 * fixed :py:obj:`zip_array_values()` and :py:obj:`zip_array_items()` functions not available
   when importing the entire larray library as ``from larray import *`` (closes :issue:`816`).
+
+* fixed wrong axes and groups names when loading a session from an HDF file
+  (closes :issue:`803`).
diff --git a/doc/source/tutorial/tutorial_IO.ipyml b/doc/source/tutorial/tutorial_IO.ipyml
@@ -660,13 +660,7 @@ cells:
 
 - markdown: |
     <div class="alert alert-info">
-      Note: Concerning the CSV and Excel formats:  
-
-      - all Axis objects are saved together in the same Excel sheet (CSV file) named `__axes__(.csv)`  
-      - all Group objects are saved together in the same Excel sheet (CSV file) named `__groups__(.csv)`  
-      - metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`  
-
-      These sheet (CSV file) names cannot be changed. 
+      Note: Concerning the CSV and Excel formats, the metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`. This sheet (CSV file) name cannot be changed. 
     </div>
 
 

diff --git a/doc/source/tutorial/tutorial_IO.ipynb b/doc/source/tutorial/tutorial_IO.ipynb
@@ -961,13 +961,7 @@
    "metadata": {},
    "source": [
     "<div class=\"alert alert-info\">\n",
-    "  Note: Concerning the CSV and Excel formats:  \n",
-    "  \n",
-    "  - all Axis objects are saved together in the same Excel sheet (CSV file) named `__axes__(.csv)`  \n",
-    "  - all Group objects are saved together in the same Excel sheet (CSV file) named `__groups__(.csv)`  \n",
-    "  - metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`  \n",
-    "  \n",
-    "  These sheet (CSV file) names cannot be changed. \n",
+    "  Note: Concerning the CSV and Excel formats, the metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`. This sheet (CSV file) name cannot be changed. \n",
     "</div>"
    ]
   },

diff --git a/larray/core/array.py b/larray/core/array.py
@@ -2415,7 +2415,8 @@ def __str__(self):
         elif not len(self):
             return 'Array([])'
         else:
-            table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS])
+            table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS],
+                              _axes_display_names=True)
             return table2str(table, 'nan', maxwidth=_OPTIONS[DISPLAY_WIDTH], keepcols=self.ndim - 1,
                              precision=_OPTIONS[DISPLAY_PRECISION])
     __repr__ = __str__
@@ -2436,12 +2437,15 @@ def as_table(self, maxlines=-1, edgeitems=5, light=False, wide=True, value_name=
         """
         warnings.warn("Array.as_table() is deprecated. Please use Array.dump() instead.", FutureWarning,
                       stacklevel=2)
-        return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name)
+        return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name,
+                         _axes_display_names=True)
 
     # XXX: dump as a 2D Array with row & col dims?
     def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
-             maxlines=-1, edgeitems=5):
-        r"""
+             maxlines=-1, edgeitems=5, _axes_display_names=False):
+        r"""dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
+             maxlines=-1, edgeitems=5)
+
         Dump array as a 2D nested list. This is especially useful when writing to an Excel sheet via open_excel().
 
         Parameters
@@ -2462,7 +2466,7 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
             Assuming header is True, whether or not to include axes names. If axes_names is 'except_last',
             all axes names will be included except the last. Defaults to True.
         na_repr : any scalar, optional
-            Replace missing values (NaN floats) by this value. Default to 'as_is' (do not do any replacement).
+            Replace missing values (NaN floats) by this value. Defaults to 'as_is' (do not do any replacement).
         maxlines : int, optional
             Maximum number of lines to show. Defaults to -1 (all lines are shown).
         edgeitems : int, optional
@@ -2516,7 +2520,11 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
          ['...',  '...', '...', '...'],
          ['a1',    'b1',     6,     7]]
         """
-        display_axes_names = axes_names
+        # _axes_display_names : bool, optional
+        #    Whether or not to get axes names using AxisCollection.display_names instead of
+        #    AxisCollection.names. Defaults to False.
+
+        dump_axes_names = axes_names
 
         if not header:
             # ensure_no_numpy_type is there mostly to avoid problems with xlwings, but I am unsure where that problem
@@ -2540,14 +2548,18 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
             data = self.data.reshape(height, width)
 
             # get list of names of axes
-            axes_names = self.axes.display_names[:]
+            if _axes_display_names:
+                axes_names = self.axes.display_names[:]
+            else:
+                axes_names = [axis_name if axis_name is not None else '' for axis_name in self.axes.names]
 
             # transforms ['a', 'b', 'c', 'd'] into ['a', 'b', 'c\\d']
             if wide and len(axes_names) > 1:
-                if display_axes_names is True:
-                    axes_names[-2] = '\\'.join(axes_names[-2:])
+                if dump_axes_names is True:
+                    separator = '\\' if axes_names[-1] else ''
+                    axes_names[-2] = separator.join(axes_names[-2:])
                     axes_names.pop()
-                elif display_axes_names == 'except_last':
+                elif dump_axes_names == 'except_last':
                     axes_names = axes_names[:-1]
                 else:
                     axes_names = [''] * (len(axes_names) - 1)

diff --git a/larray/core/session.py b/larray/core/session.py
@@ -344,7 +344,8 @@ def __setstate__(self, d):
 
     def load(self, fname, names=None, engine='auto', display=False, **kwargs):
         r"""
-        Load Array, Axis and Group objects from a file, or several .csv files.
+        Load Array objects from a file, or several .csv files (all formats).
+        Load also Axis and Group objects from a file (HDF and pickle formats).
 
         WARNING: never load a file using the pickle engine (.pkl or .pickle) from an untrusted source, as it can lead
         to arbitrary code execution.
@@ -431,7 +432,8 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
 
     def save(self, fname, names=None, engine='auto', overwrite=True, display=False, **kwargs):
         r"""
-        Dumps Array, Axis and Group objects from the current session to a file.
+        Dumps Array objects from the current session to a file (all formats).
+        Dumps also Axis and Group objects from the current session to a file (HDF and pickle format).
 
         Parameters
         ----------
@@ -450,10 +452,6 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
         display : bool, optional
             Whether or not to display which file is being worked on. Defaults to False.
 
-        Notes
-        -----
-        See Notes section from :py:meth:`~Session.to_csv` and :py:meth:`~Session.to_excel`.
-
         Examples
         --------
         >>> # axes
@@ -652,15 +650,15 @@ def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs):
 
     def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs):
         r"""
-        Dumps Array, Axis and Group objects from the current session to an Excel file.
+        Dumps Array objects from the current session to an Excel file.
 
         Parameters
         ----------
         fname : str
             Path of the file for the dump.
         names : list of str or None, optional
-            Names of Array/Axis/Group objects to dump.
-            Defaults to all objects present in the Session.
+            Names of Array objects to dump.
+            Defaults to all Array objects present in the Session.
         overwrite: bool, optional
             Whether or not to overwrite an existing file, if any. If False, file is updated. Defaults to True.
         display : bool, optional
@@ -669,8 +667,6 @@ def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs):
         Notes
         -----
         - each array is saved in a separate sheet
-        - all Axis objects are saved together in the same sheet named __axes__
-        - all Group objects are saved together in the same sheet named __groups__
         - all session metadata is saved in the same sheet named __metadata__
 
         Examples
@@ -700,23 +696,21 @@ def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs):
 
     def to_csv(self, fname, names=None, display=False, **kwargs):
         r"""
-        Dumps Array, Axis and Group objects from the current session to CSV files.
+        Dumps Array objects from the current session to CSV files.
 
         Parameters
         ----------
         fname : str
             Path for the directory that will contain CSV files.
         names : list of str or None, optional
-            Names of Array/Axis/Group objects to dump.
-            Defaults to all objects present in the Session.
+            Names of Array objects to dump.
+            Defaults to all Array objects present in the Session.
         display : bool, optional
             Whether or not to display which file is being worked on. Defaults to False.
 
         Notes
         -----
         - each array is saved in a separate file
-        - all Axis objects are saved together in the same CSV file named __axes__.csv
-        - all Group objects are saved together in the same CSV file named __groups__.csv
         - all session metadata is saved in the same CSV file named __metadata__.csv
 
         Examples

diff --git a/larray/inout/csv.py b/larray/inout/csv.py
@@ -17,7 +17,7 @@
 from larray.util.misc import skip_comment_cells, strip_rows, csv_open, deprecate_kwarg
 from larray.inout.session import register_file_handler
 from larray.inout.common import _get_index_col, FileHandler
-from larray.inout.pandas import df_asarray, _axes_to_df, _df_to_axes, _groups_to_df, _df_to_groups
+from larray.inout.pandas import df_asarray
 from larray.example import get_example_filepath
 
 
@@ -284,26 +284,9 @@ def _to_filepath(self, key):
         else:
             return key
 
-    def _load_axes_and_groups(self):
-        # load all axes
-        filepath_axes = self._to_filepath('__axes__')
-        if os.path.isfile(filepath_axes):
-            df = pd.read_csv(filepath_axes, sep=self.sep)
-            self.axes = _df_to_axes(df)
-        else:
-            self.axes = OrderedDict()
-        # load all groups
-        filepath_groups = self._to_filepath('__groups__')
-        if os.path.isfile(filepath_groups):
-            df = pd.read_csv(filepath_groups, sep=self.sep)
-            self.groups = _df_to_groups(df, self.axes)
-        else:
-            self.groups = OrderedDict()
-
     def _open_for_read(self):
         if self.directory and not os.path.isdir(self.directory):
             raise ValueError("Directory '{}' does not exist".format(self.directory))
-        self._load_axes_and_groups()
 
     def _open_for_write(self):
         if self.directory is not None:
@@ -312,8 +295,6 @@ def _open_for_write(self):
             except OSError:
                 if not os.path.isdir(self.directory):
                     raise ValueError("Path {} must represent a directory".format(self.directory))
-        self.axes = OrderedDict()
-        self.groups = OrderedDict()
 
     def list_items(self):
         fnames = glob(self.pattern) if self.pattern is not None else []
@@ -327,36 +308,18 @@ def list_items(self):
             fnames.remove('__metadata__')
         except:
             pass
-        try:
-            fnames.remove('__axes__')
-            items = [(name, 'Axis') for name in sorted(self.axes.keys())]
-        except:
-            pass
-        try:
-            fnames.remove('__groups__')
-            items += [(name, 'Group') for name in sorted(self.groups.keys())]
-        except:
-            pass
         items += [(name, 'Array') for name in fnames]
         return items
 
     def _read_item(self, key, type, *args, **kwargs):
         if type == 'Array':
             return read_csv(self._to_filepath(key), *args, **kwargs)
-        elif type == 'Axis':
-            return self.axes[key]
-        elif type == 'Group':
-            return self.groups[key]
         else:
             raise TypeError()
 
     def _dump_item(self, key, value, *args, **kwargs):
         if isinstance(value, Array):
             value.to_csv(self._to_filepath(key), *args, **kwargs)
-        elif isinstance(value, Axis):
-            self.axes[key] = value
-        elif isinstance(value, Group):
-            self.groups[key] = value
         else:
             raise TypeError()
 
@@ -374,12 +337,7 @@ def _dump_metadata(self, metadata):
             meta.to_csv(self._to_filepath('__metadata__'), sep=self.sep, wide=False, value_name='')
 
     def save(self):
-        if len(self.axes) > 0:
-            df = _axes_to_df(self.axes.values())
-            df.to_csv(self._to_filepath('__axes__'), sep=self.sep, index=False)
-        if len(self.groups) > 0:
-            df = _groups_to_df(self.groups.values())
-            df.to_csv(self._to_filepath('__groups__'), sep=self.sep, index=False)
+        pass
 
     def close(self):
         pass