Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion doc/source/changes/version_0_32.rst.inc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ Syntax changes
Backward incompatible changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

* other backward incompatible changes
* Because it was broken, the possibility to dump and load Axis and Group objects
contained in a session has been removed for the CSV and Excel formats.
Fixing it would have taken too much time considering it is very rarely used
(no one complains it was broken) so the decision to remove it was taken.
However, this is still possible using the HDF format.
Closes :issue:`815`.


New features
Expand Down Expand Up @@ -61,3 +66,6 @@ Fixes

* fixed :py:obj:`zip_array_values()` and :py:obj:`zip_array_items()` functions not available
when importing the entire larray library as ``from larray import *`` (closes :issue:`816`).

* fixed wrong axes and groups names when loading a session from an HDF file
(closes :issue:`803`).
8 changes: 1 addition & 7 deletions doc/source/tutorial/tutorial_IO.ipyml
Original file line number Diff line number Diff line change
Expand Up @@ -660,13 +660,7 @@ cells:

- markdown: |
<div class="alert alert-info">
Note: Concerning the CSV and Excel formats:

- all Axis objects are saved together in the same Excel sheet (CSV file) named `__axes__(.csv)`
- all Group objects are saved together in the same Excel sheet (CSV file) named `__groups__(.csv)`
- metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`

These sheet (CSV file) names cannot be changed.
Note: Concerning the CSV and Excel formats, the metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`. This sheet (CSV file) name cannot be changed.
</div>


Expand Down
8 changes: 1 addition & 7 deletions doc/source/tutorial/tutorial_IO.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -961,13 +961,7 @@
"metadata": {},
"source": [
"<div class=\"alert alert-info\">\n",
" Note: Concerning the CSV and Excel formats: \n",
" \n",
" - all Axis objects are saved together in the same Excel sheet (CSV file) named `__axes__(.csv)` \n",
" - all Group objects are saved together in the same Excel sheet (CSV file) named `__groups__(.csv)` \n",
" - metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)` \n",
" \n",
" These sheet (CSV file) names cannot be changed. \n",
" Note: Concerning the CSV and Excel formats, the metadata is saved in one Excel sheet (CSV file) named `__metadata__(.csv)`. This sheet (CSV file) name cannot be changed. \n",
"</div>"
]
},
Expand Down
32 changes: 22 additions & 10 deletions larray/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2415,7 +2415,8 @@ def __str__(self):
elif not len(self):
return 'Array([])'
else:
table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS])
table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS],
_axes_display_names=True)
return table2str(table, 'nan', maxwidth=_OPTIONS[DISPLAY_WIDTH], keepcols=self.ndim - 1,
precision=_OPTIONS[DISPLAY_PRECISION])
__repr__ = __str__
Expand All @@ -2436,12 +2437,15 @@ def as_table(self, maxlines=-1, edgeitems=5, light=False, wide=True, value_name=
"""
warnings.warn("Array.as_table() is deprecated. Please use Array.dump() instead.", FutureWarning,
stacklevel=2)
return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name)
return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name,
_axes_display_names=True)

# XXX: dump as a 2D Array with row & col dims?
def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
maxlines=-1, edgeitems=5):
r"""
maxlines=-1, edgeitems=5, _axes_display_names=False):
r"""dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
maxlines=-1, edgeitems=5)

Dump array as a 2D nested list. This is especially useful when writing to an Excel sheet via open_excel().

Parameters
Expand All @@ -2462,7 +2466,7 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
Assuming header is True, whether or not to include axes names. If axes_names is 'except_last',
all axes names will be included except the last. Defaults to True.
na_repr : any scalar, optional
Replace missing values (NaN floats) by this value. Default to 'as_is' (do not do any replacement).
Replace missing values (NaN floats) by this value. Defaults to 'as_is' (do not do any replacement).
maxlines : int, optional
Maximum number of lines to show. Defaults to -1 (all lines are shown).
edgeitems : int, optional
Expand Down Expand Up @@ -2516,7 +2520,11 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
['...', '...', '...', '...'],
['a1', 'b1', 6, 7]]
"""
display_axes_names = axes_names
# _axes_display_names : bool, optional
# Whether or not to get axes names using AxisCollection.display_names instead of
# AxisCollection.names. Defaults to False.

dump_axes_names = axes_names

if not header:
# ensure_no_numpy_type is there mostly to avoid problems with xlwings, but I am unsure where that problem
Expand All @@ -2540,14 +2548,18 @@ def dump(self, header=True, wide=True, value_name='value', light=False, axes_nam
data = self.data.reshape(height, width)

# get list of names of axes
axes_names = self.axes.display_names[:]
if _axes_display_names:
axes_names = self.axes.display_names[:]
else:
axes_names = [axis_name if axis_name is not None else '' for axis_name in self.axes.names]

# transforms ['a', 'b', 'c', 'd'] into ['a', 'b', 'c\\d']
if wide and len(axes_names) > 1:
if display_axes_names is True:
axes_names[-2] = '\\'.join(axes_names[-2:])
if dump_axes_names is True:
separator = '\\' if axes_names[-1] else ''
axes_names[-2] = separator.join(axes_names[-2:])
axes_names.pop()
elif display_axes_names == 'except_last':
elif dump_axes_names == 'except_last':
axes_names = axes_names[:-1]
else:
axes_names = [''] * (len(axes_names) - 1)
Expand Down
26 changes: 10 additions & 16 deletions larray/core/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ def __setstate__(self, d):

def load(self, fname, names=None, engine='auto', display=False, **kwargs):
r"""
Load Array, Axis and Group objects from a file, or several .csv files.
Load Array objects from a file, or several .csv files (all formats).
Load also Axis and Group objects from a file (HDF and pickle formats).

WARNING: never load a file using the pickle engine (.pkl or .pickle) from an untrusted source, as it can lead
to arbitrary code execution.
Expand Down Expand Up @@ -431,7 +432,8 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):

def save(self, fname, names=None, engine='auto', overwrite=True, display=False, **kwargs):
r"""
Dumps Array, Axis and Group objects from the current session to a file.
Dumps Array objects from the current session to a file (all formats).
Dumps also Axis and Group objects from the current session to a file (HDF and pickle format).

Parameters
----------
Expand All @@ -450,10 +452,6 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
display : bool, optional
Whether or not to display which file is being worked on. Defaults to False.

Notes
-----
See Notes section from :py:meth:`~Session.to_csv` and :py:meth:`~Session.to_excel`.

Examples
--------
>>> # axes
Expand Down Expand Up @@ -652,15 +650,15 @@ def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs):

def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs):
r"""
Dumps Array, Axis and Group objects from the current session to an Excel file.
Dumps Array objects from the current session to an Excel file.

Parameters
----------
fname : str
Path of the file for the dump.
names : list of str or None, optional
Names of Array/Axis/Group objects to dump.
Defaults to all objects present in the Session.
Names of Array objects to dump.
Defaults to all Array objects present in the Session.
overwrite: bool, optional
Whether or not to overwrite an existing file, if any. If False, file is updated. Defaults to True.
display : bool, optional
Expand All @@ -669,8 +667,6 @@ def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs):
Notes
-----
- each array is saved in a separate sheet
- all Axis objects are saved together in the same sheet named __axes__
- all Group objects are saved together in the same sheet named __groups__
- all session metadata is saved in the same sheet named __metadata__

Examples
Expand Down Expand Up @@ -700,23 +696,21 @@ def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs):

def to_csv(self, fname, names=None, display=False, **kwargs):
r"""
Dumps Array, Axis and Group objects from the current session to CSV files.
Dumps Array objects from the current session to CSV files.

Parameters
----------
fname : str
Path for the directory that will contain CSV files.
names : list of str or None, optional
Names of Array/Axis/Group objects to dump.
Defaults to all objects present in the Session.
Names of Array objects to dump.
Defaults to all Array objects present in the Session.
display : bool, optional
Whether or not to display which file is being worked on. Defaults to False.

Notes
-----
- each array is saved in a separate file
- all Axis objects are saved together in the same CSV file named __axes__.csv
- all Group objects are saved together in the same CSV file named __groups__.csv
- all session metadata is saved in the same CSV file named __metadata__.csv

Examples
Expand Down
46 changes: 2 additions & 44 deletions larray/inout/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from larray.util.misc import skip_comment_cells, strip_rows, csv_open, deprecate_kwarg
from larray.inout.session import register_file_handler
from larray.inout.common import _get_index_col, FileHandler
from larray.inout.pandas import df_asarray, _axes_to_df, _df_to_axes, _groups_to_df, _df_to_groups
from larray.inout.pandas import df_asarray
from larray.example import get_example_filepath


Expand Down Expand Up @@ -284,26 +284,9 @@ def _to_filepath(self, key):
else:
return key

def _load_axes_and_groups(self):
# load all axes
filepath_axes = self._to_filepath('__axes__')
if os.path.isfile(filepath_axes):
df = pd.read_csv(filepath_axes, sep=self.sep)
self.axes = _df_to_axes(df)
else:
self.axes = OrderedDict()
# load all groups
filepath_groups = self._to_filepath('__groups__')
if os.path.isfile(filepath_groups):
df = pd.read_csv(filepath_groups, sep=self.sep)
self.groups = _df_to_groups(df, self.axes)
else:
self.groups = OrderedDict()

def _open_for_read(self):
if self.directory and not os.path.isdir(self.directory):
raise ValueError("Directory '{}' does not exist".format(self.directory))
self._load_axes_and_groups()

def _open_for_write(self):
if self.directory is not None:
Expand All @@ -312,8 +295,6 @@ def _open_for_write(self):
except OSError:
if not os.path.isdir(self.directory):
raise ValueError("Path {} must represent a directory".format(self.directory))
self.axes = OrderedDict()
self.groups = OrderedDict()

def list_items(self):
fnames = glob(self.pattern) if self.pattern is not None else []
Expand All @@ -327,36 +308,18 @@ def list_items(self):
fnames.remove('__metadata__')
except:
pass
try:
fnames.remove('__axes__')
items = [(name, 'Axis') for name in sorted(self.axes.keys())]
except:
pass
try:
fnames.remove('__groups__')
items += [(name, 'Group') for name in sorted(self.groups.keys())]
except:
pass
items += [(name, 'Array') for name in fnames]
return items

def _read_item(self, key, type, *args, **kwargs):
if type == 'Array':
return read_csv(self._to_filepath(key), *args, **kwargs)
elif type == 'Axis':
return self.axes[key]
elif type == 'Group':
return self.groups[key]
else:
raise TypeError()

def _dump_item(self, key, value, *args, **kwargs):
if isinstance(value, Array):
value.to_csv(self._to_filepath(key), *args, **kwargs)
elif isinstance(value, Axis):
self.axes[key] = value
elif isinstance(value, Group):
self.groups[key] = value
else:
raise TypeError()

Expand All @@ -374,12 +337,7 @@ def _dump_metadata(self, metadata):
meta.to_csv(self._to_filepath('__metadata__'), sep=self.sep, wide=False, value_name='')

def save(self):
if len(self.axes) > 0:
df = _axes_to_df(self.axes.values())
df.to_csv(self._to_filepath('__axes__'), sep=self.sep, index=False)
if len(self.groups) > 0:
df = _groups_to_df(self.groups.values())
df.to_csv(self._to_filepath('__groups__'), sep=self.sep, index=False)
pass

def close(self):
pass
Loading