fix #820 : added argument _axes_display_names to the Array.dump() function to specify if exported axes names are defined by the AxisCollection.names or AxisCollection.display_names property

alixdamman · alixdamman · commit 0c11567f4583 · 2019-10-24T16:25:34.000+02:00
diff --git a/larray/core/array.py b/larray/core/array.py
@@ -2415,7 +2415,8 @@ def __str__(self):
         elif not len(self):
             return 'Array([])'
         else:
-            table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS])
+            table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS],
+                              _axes_display_names=True)
             return table2str(table, 'nan', maxwidth=_OPTIONS[DISPLAY_WIDTH], keepcols=self.ndim - 1,
                              precision=_OPTIONS[DISPLAY_PRECISION])
     __repr__ = __str__
@@ -2436,12 +2437,15 @@ def as_table(self, maxlines=-1, edgeitems=5, light=False, wide=True, value_name=
         """
         warnings.warn("Array.as_table() is deprecated. Please use Array.dump() instead.", FutureWarning,
                       stacklevel=2)
-        return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name)
+        return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name,
+                         _axes_display_names=True)
 
     # XXX: dump as a 2D Array with row & col dims?
     def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
-             maxlines=-1, edgeitems=5):
-        r"""
+             maxlines=-1, edgeitems=5, _axes_display_names=False):
+        r"""dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
+             maxlines=-1, edgeitems=5)
+
         Dump array as a 2D nested list. This is especially useful when writing to an Excel sheet via open_excel().
 
         Parameters
@@ -2462,7 +2466,7 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
             Assuming header is True, whether or not to include axes names. If axes_names is 'except_last',
             all axes names will be included except the last. Defaults to True.
         na_repr : any scalar, optional
-            Replace missing values (NaN floats) by this value. Default to 'as_is' (do not do any replacement).
+            Replace missing values (NaN floats) by this value. Defaults to 'as_is' (do not do any replacement).
         maxlines : int, optional
             Maximum number of lines to show. Defaults to -1 (all lines are shown).
         edgeitems : int, optional
@@ -2516,7 +2520,11 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
          ['...',  '...', '...', '...'],
          ['a1',    'b1',     6,     7]]
         """
-        display_axes_names = axes_names
+        # _axes_display_names : bool, optional
+        #    Whether or not to get axes names using AxisCollection.display_names instead of
+        #    AxisCollection.names. Defaults to False.
+
+        dump_axes_names = axes_names
 
         if not header:
             # ensure_no_numpy_type is there mostly to avoid problems with xlwings, but I am unsure where that problem
@@ -2540,14 +2548,18 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
             data = self.data.reshape(height, width)
 
             # get list of names of axes
-            axes_names = self.axes.display_names[:]
+            if _axes_display_names:
+                axes_names = self.axes.display_names[:]
+            else:
+                axes_names = [axis_name if axis_name is not None else '' for axis_name in self.axes.names]
 
             # transforms ['a', 'b', 'c', 'd'] into ['a', 'b', 'c\\d']
             if wide and len(axes_names) > 1:
-                if display_axes_names is True:
-                    axes_names[-2] = '\\'.join(axes_names[-2:])
+                if dump_axes_names is True:
+                    separator = '\\' if axes_names[-1] else ''
+                    axes_names[-2] = separator.join(axes_names[-2:])
                     axes_names.pop()
-                elif display_axes_names == 'except_last':
+                elif dump_axes_names == 'except_last':
                     axes_names = axes_names[:-1]
                 else:
                     axes_names = [''] * (len(axes_names) - 1)
diff --git a/larray/inout/pandas.py b/larray/inout/pandas.py
@@ -226,6 +226,7 @@ def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfo
     if unfold_last_axis_name:
         if isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]:
             last_axes = [name.strip() for name in axes_names[-1].split('\\')]
+            last_axes = [name if name else None for name in last_axes]
             axes_names = axes_names[:-1] + last_axes
         else:
             axes_names += [None]
@@ -327,8 +328,13 @@ def df_asarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header=
             raise ValueError('sort_rows=True is not valid for 1D arrays. Please use sort_columns instead.')
         res = from_series(series, sort_rows=sort_columns)
     else:
-        axes_names = [decode(name, 'utf8') if isinstance(name, basestring) else name
-                      for name in df.index.names]
+        def parse_axis_name(name):
+            if isinstance(name, basestring):
+                name = decode(name, 'utf8')
+            if not name:
+                name = None
+            return name
+        axes_names = [parse_axis_name(name) for name in df.index.names]
         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py
@@ -4128,11 +4128,22 @@ def test_to_excel_xlwings(tmpdir):
 
 
 def test_dump():
+    # narrow format
     res = list(ndtest(3).dump(wide=False, value_name='data'))
     assert res == [['a', 'data'],
                    ['a0', 0],
                    ['a1', 1],
                    ['a2', 2]]
+    # array with an anonymous axis and a wildcard axis
+    arr = ndtest((Axis('a0,a1'), Axis(2, 'b')))
+    res = arr.dump()
+    assert res == [['\\b', 0, 1],
+                   ['a0', 0, 1],
+                   ['a1', 2, 3]]
+    res = arr.dump(_axes_display_names=True)
+    assert res == [['{0}\\b*', 0, 1],
+                   ['a0', 0, 1],
+                   ['a1', 2, 3]]
 
 
 @needs_xlwings
@@ -4293,7 +4304,7 @@ def test_open_excel(tmpdir):
         assert_array_equal(res, a3.data.reshape((6, 4)))
 
     # 4) Blank cells
-    # ========================
+    # ==============
     # Excel sheet with blank cells on right/bottom border of the array to read
     fpath = inputpath('test_blank_cells.xlsx')
     with open_excel(fpath) as wb:
@@ -4309,7 +4320,16 @@ def test_open_excel(tmpdir):
     assert_array_equal(bad3, good2)
     assert_array_equal(bad4, good2)
 
-    # 5) crash test
+    # 5) anonymous and wilcard axes
+    # =============================
+    arr = ndtest((Axis('a0,a1'), Axis(2, 'b')))
+    fpath = tmp_path(tmpdir, 'anonymous_and_wildcard_axes.xlsx')
+    with open_excel(fpath, overwrite_file=True) as wb:
+        wb[0] = arr.dump()
+        res = wb[0].load()
+        assert arr.equals(res)
+
+    # 6) crash test
     # =============
     arr = ndtest((2, 2))
     fpath = tmp_path(tmpdir, 'temporary_test_file.xlsx')