Skip to content

Commit efe7809

Browse files
committed
implemented light, axes_names and na_repr arguments for LArray.dump, deprecated LArray.as_table() (merged in LArray.dump()) and fixed 65535 bug
1 parent 2f2449d commit efe7809

File tree

6 files changed

+206
-124
lines changed

6 files changed

+206
-124
lines changed

doc/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,7 @@ Write
637637
LArray.to_excel
638638
LArray.to_hdf
639639
LArray.to_stata
640+
LArray.dump
640641

641642
Excel
642643
=====

doc/source/changes/version_0_30.rst.inc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Syntax changes
55
^^^^^^^^^^^^^^
66

7-
* new syntax
7+
* :py:obj:`LArray.as_table()` is deprecated. Please use :py:obj:`LArray.dump()` instead.
88

99

1010
Backward incompatible changes
@@ -302,6 +302,12 @@ Miscellaneous improvements
302302
* allowed to pass a single axis or group as ``axes_to_reindex`` argument
303303
of the :py:obj:`LArray.reindex()` method (closes :issue:`712`).
304304

305+
* :py:obj:`LArray.dump()` gained a few extra arguments to further customize output :
306+
- axes_names : to specify whether or not the output should contain the axes names (and which)
307+
- maxlines and edgeitems : to dump only the start and end of large arrays
308+
- light : to output axes labels only when they change instead of repeating them on each line
309+
- na_repr : to specify how to represent N/A (NaN) values
310+
305311
* substantially improved performance of creating, iterating, and doing a few other operations over larray objects.
306312
This solves a few pathological cases of slow operations, especially those involving many small-ish arrays but sadly
307313
the overall performance improvement is negligible over most of the real-world models using larray that we tested these
@@ -311,6 +317,9 @@ Miscellaneous improvements
311317
Fixes
312318
^^^^^
313319

320+
* fixed dumping to Excel arrays of "object" dtype containing NaN values using numpy float types (fixes the
321+
infamous 65535 bug).
322+
314323
* fixed :py:obj:`LArray.divnot0()` being slow when the divisor has many axes and many zeros (closes :issue:`705`).
315324

316325
* fixed maximum length of sheet names (31 characters instead of 30 characters) when adding a new sheet

larray/core/array.py

Lines changed: 140 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
# include utils only in larray project and make larray a dependency of liam2
3131
# (and potentially rename it to reflect the broader scope)
3232

33-
from collections import Iterable, Sequence, OrderedDict
33+
from collections import Iterable, Sequence, OrderedDict, abc
3434
from itertools import product, chain, groupby, islice
3535
import os
3636
import sys
@@ -64,7 +64,8 @@
6464
from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis
6565
from larray.util.misc import (table2str, size2str, basestring, izip, rproduct, ReprString, duplicates,
6666
float_error_handler_factory, _isnoneslice, light_product, unique_list, common_type,
67-
renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, PY2)
67+
renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip,
68+
Repeater, Product, ensure_no_numpy_type, PY2)
6869
from larray.util.options import _OPTIONS, DISPLAY_MAXLINES, DISPLAY_EDGEITEMS, DISPLAY_WIDTH, DISPLAY_PRECISION
6970

7071

@@ -2325,7 +2326,7 @@ def __str__(self):
23252326
elif not len(self):
23262327
return 'LArray([])'
23272328
else:
2328-
table = list(self.as_table(_OPTIONS[DISPLAY_MAXLINES], _OPTIONS[DISPLAY_EDGEITEMS]))
2329+
table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS])
23292330
return table2str(table, 'nan', maxwidth=_OPTIONS[DISPLAY_WIDTH], keepcols=self.ndim - 1,
23302331
precision=_OPTIONS[DISPLAY_PRECISION])
23312332
__repr__ = __str__
@@ -2342,120 +2343,17 @@ def __contains__(self, key):
23422343

23432344
def as_table(self, maxlines=-1, edgeitems=5, light=False, wide=True, value_name='value'):
23442345
r"""
2345-
Generator. Returns next line of the table representing an array.
2346-
2347-
Parameters
2348-
----------
2349-
maxlines : int, optional
2350-
Maximum number of lines to show. Defaults to -1 (all lines are shown).
2351-
edgeitems : int, optional
2352-
If number of lines to display is greater than `maxlines`,
2353-
only the first and last `edgeitems` lines are displayed.
2354-
Only active if `maxlines` is not -1.
2355-
Defaults to 5.
2356-
light : bool, optional
2357-
Whether or not to hide repeated labels. In other words, only show a label if it is different from the
2358-
previous one. Defaults to False.
2359-
wide : boolean, optional
2360-
Whether or not to write arrays in "wide" format. If True, arrays are exported with the last axis
2361-
represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one
2362-
value column. Defaults to True.
2363-
value_name : str, optional
2364-
Name of the column containing the values (last column) when `wide=False` (see above).
2365-
Defaults to 'value'.
2366-
2367-
Returns
2368-
-------
2369-
list
2370-
Next line of the table as a list.
2371-
2372-
Examples
2373-
--------
2374-
>>> arr = ndtest((2, 2, 3))
2375-
>>> list(arr.as_table()) # doctest: +NORMALIZE_WHITESPACE
2376-
[['a', 'b\\c', 'c0', 'c1', 'c2'],
2377-
['a0', 'b0', 0, 1, 2],
2378-
['a0', 'b1', 3, 4, 5],
2379-
['a1', 'b0', 6, 7, 8],
2380-
['a1', 'b1', 9, 10, 11]]
2381-
>>> list(arr.as_table(light=True)) # doctest: +NORMALIZE_WHITESPACE
2382-
[['a', 'b\\c', 'c0', 'c1', 'c2'],
2383-
['a0', 'b0', 0, 1, 2],
2384-
['', 'b1', 3, 4, 5],
2385-
['a1', 'b0', 6, 7, 8],
2386-
['', 'b1', 9, 10, 11]]
2387-
>>> list(arr.as_table(wide=False, value_name='data')) # doctest: +NORMALIZE_WHITESPACE
2388-
[['a', 'b', 'c', 'data'],
2389-
['a0', 'b0', 'c0', 0],
2390-
['a0', 'b0', 'c1', 1],
2391-
['a0', 'b0', 'c2', 2],
2392-
['a0', 'b1', 'c0', 3],
2393-
['a0', 'b1', 'c1', 4],
2394-
['a0', 'b1', 'c2', 5],
2395-
['a1', 'b0', 'c0', 6],
2396-
['a1', 'b0', 'c1', 7],
2397-
['a1', 'b0', 'c2', 8],
2398-
['a1', 'b1', 'c0', 9],
2399-
['a1', 'b1', 'c1', 10],
2400-
['a1', 'b1', 'c2', 11]]
2346+
Deprecated. Please use LArray.dump() instead.
24012347
"""
2402-
if not self.ndim:
2403-
return
2404-
2405-
# ert unit geo\time 2012 2011 2010
2406-
# NEER27 I05 AT 101.41 101.63 101.63
2407-
# NEER27 I05 AU 134.86 125.29 117.08
2408-
if wide:
2409-
width = self.shape[-1]
2410-
height = int(np.prod(self.shape[:-1]))
2411-
else:
2412-
width = 1
2413-
height = int(np.prod(self.shape))
2414-
data = np.asarray(self).reshape(height, width)
2415-
2416-
# get list of names of axes
2417-
axes_names = self.axes.display_names[:]
2418-
# transforms ['a', 'b', 'c', 'd'] into ['a', 'b', 'c\\d']
2419-
if wide and len(axes_names) > 1:
2420-
axes_names[-2] = '\\'.join(axes_names[-2:])
2421-
axes_names.pop()
2422-
axes = self.axes[:-1] if wide else self.axes
2423-
# get list of labels for each axis (except the last one if wide=True)
2424-
labels = [axis.labels.tolist() for axis in axes]
2425-
# creates vertical lines (ticks is a list of list)
2426-
if self.ndim == 1 and wide:
2427-
# There is no vertical axis, so the axis name should not have
2428-
# any "tick" below it and we add an empty "tick".
2429-
ticks = [['']]
2430-
elif light:
2431-
ticks = light_product(*labels)
2432-
else:
2433-
ticks = product(*labels)
2434-
# returns the first line
2435-
other_colnames = self.axes[-1].labels.tolist() if wide else [value_name]
2436-
yield axes_names + other_colnames
2437-
# summary if needed
2438-
if maxlines >= 0 and height > maxlines:
2439-
# replace middle lines of the table by '...'.
2440-
# We show only the first and last edgeitems lines.
2441-
startticks = islice(ticks, edgeitems)
2442-
midticks = [["..."] * (self.ndim - 1)]
2443-
endticks = list(islice(rproduct(*labels), edgeitems))[::-1]
2444-
ticks = chain(startticks, midticks, endticks)
2445-
data = chain(data[:edgeitems].tolist(),
2446-
[["..."] * width],
2447-
data[-edgeitems:].tolist())
2448-
for tick, dataline in izip(ticks, data):
2449-
# returns next line (labels of N-1 first axes + data)
2450-
yield list(tick) + dataline
2451-
else:
2452-
for tick, dataline in izip(ticks, data):
2453-
# returns next line (labels of N-1 first axes + data)
2454-
yield list(tick) + dataline.tolist()
2348+
warnings.warn("LArray.as_table() is deprecated. Please use LArray.dump() instead.", FutureWarning,
2349+
stacklevel=2)
2350+
return self.dump(maxlines=maxlines, edgeitems=edgeitems, light=light, wide=wide, value_name=value_name)
24552351

2456-
def dump(self, header=True, wide=True, value_name='value'):
2457-
"""
2458-
Dump array as a 2D nested list
2352+
# XXX: dump as a 2D LArray with row & col dims?
2353+
def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
2354+
maxlines=-1, edgeitems=5):
2355+
r"""
2356+
Dump array as a 2D nested list. This is especially useful when writing to an Excel sheet via open_excel().
24592357
24602358
Parameters
24612359
----------
@@ -2468,16 +2366,140 @@ def dump(self, header=True, wide=True, value_name='value'):
24682366
value_name : str, optional
24692367
Name of the column containing the values (last column) when `wide=False` (see above).
24702368
Not used if header=False. Defaults to 'value'.
2369+
light : bool, optional
2370+
Whether or not to hide repeated labels. In other words, only show a label if it is different from the
2371+
previous one. Defaults to False.
2372+
axes_names : bool or 'except_last', optional
2373+
Assuming header is True, whether or not to include axes names. If axes_names is 'except_last',
2374+
all axes names will be included except the last. Defaults to True.
2375+
na_repr : any scalar, optional
2376+
Replace missing values (NaN floats) by this value. Default to 'as_is' (do not do any replacement).
2377+
maxlines : int, optional
2378+
Maximum number of lines to show. Defaults to -1 (all lines are shown).
2379+
edgeitems : int, optional
2380+
If number of lines to display is greater than `maxlines`, only the first and last `edgeitems` lines are
2381+
displayed. Only active if `maxlines` is not -1. Defaults to 5.
24712382
24722383
Returns
24732384
-------
2474-
2D nested list
2385+
2D nested list or None for 0d arrays
2386+
2387+
Examples
2388+
--------
2389+
>>> arr = ndtest((2, 2, 2))
2390+
>>> arr.dump() # doctest: +NORMALIZE_WHITESPACE
2391+
[['a', 'b\\c', 'c0', 'c1'],
2392+
['a0', 'b0', 0, 1],
2393+
['a0', 'b1', 2, 3],
2394+
['a1', 'b0', 4, 5],
2395+
['a1', 'b1', 6, 7]]
2396+
>>> arr.dump(axes_names=False) # doctest: +NORMALIZE_WHITESPACE
2397+
[['', '', 'c0', 'c1'],
2398+
['a0', 'b0', 0, 1],
2399+
['a0', 'b1', 2, 3],
2400+
['a1', 'b0', 4, 5],
2401+
['a1', 'b1', 6, 7]]
2402+
>>> arr.dump(axes_names='except_last') # doctest: +NORMALIZE_WHITESPACE
2403+
[['a', 'b', 'c0', 'c1'],
2404+
['a0', 'b0', 0, 1],
2405+
['a0', 'b1', 2, 3],
2406+
['a1', 'b0', 4, 5],
2407+
['a1', 'b1', 6, 7]]
2408+
>>> arr.dump(light=True) # doctest: +NORMALIZE_WHITESPACE
2409+
[['a', 'b\\c', 'c0', 'c1'],
2410+
['a0', 'b0', 0, 1],
2411+
['', 'b1', 2, 3],
2412+
['a1', 'b0', 4, 5],
2413+
['', 'b1', 6, 7]]
2414+
>>> arr.dump(wide=False, value_name='data') # doctest: +NORMALIZE_WHITESPACE
2415+
[['a', 'b', 'c', 'data'],
2416+
['a0', 'b0', 'c0', 0],
2417+
['a0', 'b0', 'c1', 1],
2418+
['a0', 'b1', 'c0', 2],
2419+
['a0', 'b1', 'c1', 3],
2420+
['a1', 'b0', 'c0', 4],
2421+
['a1', 'b0', 'c1', 5],
2422+
['a1', 'b1', 'c0', 6],
2423+
['a1', 'b1', 'c1', 7]]
2424+
>>> arr.dump(maxlines=3, edgeitems=1) # doctest: +NORMALIZE_WHITESPACE
2425+
[['a', 'b\\c', 'c0', 'c1'],
2426+
['a0', 'b0', 0, 1],
2427+
['...', '...', '...', '...'],
2428+
['a1', 'b1', 6, 7]]
24752429
"""
2430+
display_axes_names = axes_names
2431+
24762432
if not header:
2433+
# ensure_no_numpy_type is there mostly to avoid problems with xlwings, but I am unsure where that problem
2434+
# should be fixed: in np.array.tolist, in xlwings, here or in xw_excel.Sheet.__setitem__. Doing it here
2435+
# is uglier than in xw_excel but is faster because nothing (extra) needs to be done when the
2436+
# array is not of object dtype (the usual case).
2437+
24772438
# flatten all dimensions except the last one
2478-
return self.data.reshape(-1, self.shape[-1]).tolist()
2439+
# same fix should be applies in as_table above (it uses tolist too)
2440+
res2d = ensure_no_numpy_type(self.data.reshape(-1, self.shape[-1]))
24792441
else:
2480-
return list(self.as_table(maxlines=-1, wide=wide, value_name=value_name))
2442+
if not self.ndim:
2443+
return None
2444+
2445+
if wide:
2446+
width = self.shape[-1]
2447+
height = int(np.prod(self.shape[:-1]))
2448+
else:
2449+
width = 1
2450+
height = int(np.prod(self.shape))
2451+
data = self.data.reshape(height, width)
2452+
2453+
# get list of names of axes
2454+
axes_names = self.axes.display_names[:]
2455+
2456+
# transforms ['a', 'b', 'c', 'd'] into ['a', 'b', 'c\\d']
2457+
if wide and len(axes_names) > 1:
2458+
if display_axes_names is True:
2459+
axes_names[-2] = '\\'.join(axes_names[-2:])
2460+
axes_names.pop()
2461+
elif display_axes_names == 'except_last':
2462+
axes_names = axes_names[:-1]
2463+
else:
2464+
axes_names = [''] * (len(axes_names) - 1)
2465+
2466+
axes = self.axes[:-1] if wide else self.axes
2467+
2468+
# get list of labels for each axis (except the last one if wide=True)
2469+
labels = [ensure_no_numpy_type(axis.labels) for axis in axes]
2470+
2471+
# creates vertical lines (ticks is a list of list)
2472+
if self.ndim == 1 and wide:
2473+
# There is no vertical axis, so the axis name should not have
2474+
# any "tick" below it and we add an empty "tick".
2475+
ticks = [['']]
2476+
elif light:
2477+
ticks = light_product(*labels)
2478+
else:
2479+
ticks = Product(labels)
2480+
2481+
# computes the first line
2482+
other_colnames = ensure_no_numpy_type(self.axes[-1].labels) if wide else [value_name]
2483+
res2d = [axes_names + other_colnames]
2484+
2485+
# summary if needed
2486+
if maxlines != -1 and height > maxlines:
2487+
# replace middle lines of the table by '...'.
2488+
# We show only the first and last edgeitems lines.
2489+
res2d.extend([list(tick) + dataline
2490+
for tick, dataline in zip(ticks[:edgeitems], ensure_no_numpy_type(data[:edgeitems]))])
2491+
res2d.append(["..."] * (self.ndim - 1 + width))
2492+
res2d.extend([list(tick) + dataline
2493+
for tick, dataline in zip(ticks[-edgeitems:], ensure_no_numpy_type(data[-edgeitems:]))])
2494+
else:
2495+
# all other lines (labels of N-1 first axes + data)
2496+
res2d.extend([list(tick) + ensure_no_numpy_type(dataline) for tick, dataline in zip(ticks, data)])
2497+
2498+
if na_repr != 'as_is':
2499+
res2d = [[na_repr if value != value else value
2500+
for value in line]
2501+
for line in res2d]
2502+
return res2d
24812503

24822504
# XXX: should filter(geo=['W']) return a view by default? (collapse=True)
24832505
# I think it would be dangerous to make it the default

larray/tests/test_array.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,11 +288,13 @@ def test_str(small_array, array):
288288
assert str(small_array[lipro3, sex['M']]) == """\
289289
lipro P01 P02 P03
290290
0 1 2"""
291+
291292
# two dimensions
292293
assert str(small_array.filter(lipro=lipro3)) == """\
293294
sex\\lipro P01 P02 P03
294295
M 0 1 2
295296
F 15 16 17"""
297+
296298
# four dimensions (too many rows)
297299
assert str(array.filter(lipro=lipro3)) == """\
298300
age geo sex\\lipro P01 P02 P03
@@ -307,6 +309,7 @@ def test_str(small_array, array):
307309
115 A93 F 153075.0 153076.0 153077.0
308310
115 A21 M 153090.0 153091.0 153092.0
309311
115 A21 F 153105.0 153106.0 153107.0"""
312+
310313
# too many columns
311314
assert str(array['P01', 'A11', 'M']) == """\
312315
age 0 1 2 ... 112 113 114 115

0 commit comments

Comments
 (0)