Skip to content

Commit 0fe2463

Browse files
committed
implemented LArray.unique
1 parent 3577310 commit 0fe2463

File tree

4 files changed

+114
-0
lines changed

4 files changed

+114
-0
lines changed

doc/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ Miscellaneous
461461
LArray.shift
462462
LArray.roll
463463
LArray.diff
464+
LArray.unique
464465
LArray.to_clipboard
465466

466467
.. _la_to_pandas:

doc/source/changes/version_0_30.rst.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ New features
123123
* implemented :py:obj:`LArray.keys()`, :py:obj:`LArray.values()` and :py:obj:`LArray.items()`
124124
methods to respectively loop on an array labels, values or (key, value) pairs.
125125

126+
* implemented :py:obj:`LArray.unique()` method to compute unique values (or sub-arrays) for an array,
127+
optionally along axes.
128+
126129
* implemented :py:obj:`Axis.apply()` method to transform an axis labels by a function and return a new Axis.
127130

128131
>>> sex = Axis('sex=MALE,FEMALE')

larray/core/array.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3511,6 +3511,100 @@ def copy(self):
35113511
"""
35123512
return LArray(self.data.copy(), axes=self.axes[:], meta=self.meta)
35133513

3514+
# XXX: we might want to implement this using .groupby().first()
3515+
def unique(self, axes=None, sort=False, sep='_'):
3516+
r"""Returns unique values (optionally along axes)
3517+
3518+
Parameters
3519+
----------
3520+
axes : axis reference (int, str, Axis) or sequence of them, optional
3521+
Axis or axes along which to compute unique values. Defaults to None (all axes).
3522+
sort : bool, optional
3523+
Whether or not to sort unique values. Defaults to False. Sorting is not implemented yet for unique() along
3524+
multiple axes.
3525+
sep : str, optional
3526+
Separator when several labels need to be combined. Defaults to '_'.
3527+
3528+
Returns
3529+
-------
3530+
LArray
3531+
array with unique values
3532+
3533+
Examples
3534+
--------
3535+
>>> arr = LArray([[0, 2, 0, 0],
3536+
... [1, 1, 1, 0]], 'a=a0,a1;b=b0..b3')
3537+
>>> arr
3538+
a\b b0 b1 b2 b3
3539+
a0 0 2 0 0
3540+
a1 1 1 1 0
3541+
3542+
By default unique() returns the first occurrence of each unique value in the order it appears:
3543+
3544+
>>> arr.unique()
3545+
a_b a0_b0 a0_b1 a1_b0
3546+
0 2 1
3547+
3548+
To sort the unique values, use the sort argument:
3549+
3550+
>>> arr.unique(sort=True)
3551+
a_b a0_b0 a1_b0 a0_b1
3552+
0 1 2
3553+
3554+
One can also compute unique sub-arrays (i.e. combination of values) along axes. In our example the a0=0, a1=1
3555+
combination appears twice along the 'b' axis, so 'b2' is not returned:
3556+
3557+
>>> arr.unique('b')
3558+
a\b b0 b1 b3
3559+
a0 0 2 0
3560+
a1 1 1 0
3561+
>>> arr.unique('b', sort=True)
3562+
a\b b3 b0 b1
3563+
a0 0 0 2
3564+
a1 0 1 1
3565+
"""
3566+
if axes is not None:
3567+
axes = self.axes[axes]
3568+
3569+
assert axes is None or isinstance(axes, (Axis, AxisCollection))
3570+
3571+
if not isinstance(axes, AxisCollection):
3572+
axis_idx = self.axes.index(axes) if axes is not None else None
3573+
# axis needs np >= 1.13
3574+
_, unq_index = np.unique(self, axis=axis_idx, return_index=True)
3575+
if not sort:
3576+
unq_index = np.sort(unq_index)
3577+
if axes is None:
3578+
return self.iflat.__getitem__(unq_index, sep=sep)
3579+
else:
3580+
return self[axes.i[unq_index]]
3581+
else:
3582+
if sort:
3583+
raise NotImplementedError('sort=True is not implemented for unique along multiple axes')
3584+
unq_list = []
3585+
seen = set()
3586+
list_append = unq_list.append
3587+
seen_add = seen.add
3588+
sep_join = sep.join
3589+
axis_name = sep_join(a.name for a in axes)
3590+
first_axis_idx = self.axes.index(axes[0])
3591+
# XXX: use combine_axes(axes).items() instead?
3592+
for labels, value in self.items(axes):
3593+
hashable_value = value.data.tobytes() if isinstance(value, LArray) else value
3594+
if hashable_value not in seen:
3595+
list_append((sep_join(str(l) for l in labels), value))
3596+
seen_add(hashable_value)
3597+
res_arr = stack(unq_list, axis_name)
3598+
# transpose the combined axis at the position where the first of the combined axes was
3599+
# TODO: use res_arr.transpose(res_arr.axes.move_axis(-1, first_axis_idx)) once #564 is implemented:
3600+
# https://github.com/larray-project/larray/issues/564
3601+
# stack adds the stacked axes at the end
3602+
combined_axis = res_arr.axes[-1]
3603+
assert combined_axis.name == axis_name
3604+
new_axes_order = res_arr.axes - combined_axis
3605+
new_axes_order.insert(first_axis_idx, combined_axis)
3606+
return res_arr.transpose(new_axes_order)
3607+
35143608
@property
35153609
def info(self):
35163610
"""Describes a LArray (metadata + shape and labels for each axis).

larray/tests/test_array.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3001,6 +3001,22 @@ def test_shift_axis(small_array):
30013001
l2.axes.lipro.labels = lipro.labels[1:]
30023002

30033003

3004+
def test_unique():
3005+
arr = LArray([[[0, 2, 0, 0],
3006+
[1, 1, 1, 0]],
3007+
[[0, 2, 0, 0],
3008+
[2, 1, 2, 0]]], 'a=a0,a1;b=b0,b1;c=c0..c3')
3009+
assert_array_equal(arr.unique('a'), arr)
3010+
assert_array_equal(arr.unique('b'), arr)
3011+
assert_array_equal(arr.unique('c'), arr['c0,c1,c3'])
3012+
expected = from_string("""\
3013+
a_b\\c c0 c1 c2 c3
3014+
a0_b0 0 2 0 0
3015+
a0_b1 1 1 1 0
3016+
a1_b1 2 1 2 0""")
3017+
assert_array_equal(arr.unique(('a', 'b')), expected)
3018+
3019+
30043020
def test_extend(small_array):
30053021
sex, lipro = small_array.axes
30063022

0 commit comments

Comments
 (0)