@@ -3511,6 +3511,100 @@ def copy(self):
3511
3511
"""
3512
3512
return LArray (self .data .copy (), axes = self .axes [:], meta = self .meta )
3513
3513
3514
+ # XXX: we might want to implement this using .groupby().first()
3515
+ def unique (self , axes = None , sort = False , sep = '_' ):
3516
+ r"""Returns unique values (optionally along axes)
3517
+
3518
+ Parameters
3519
+ ----------
3520
+ axes : axis reference (int, str, Axis) or sequence of them, optional
3521
+ Axis or axes along which to compute unique values. Defaults to None (all axes).
3522
+ sort : bool, optional
3523
+ Whether or not to sort unique values. Defaults to False. Sorting is not implemented yet for unique() along
3524
+ multiple axes.
3525
+ sep : str, optional
3526
+ Separator when several labels need to be combined. Defaults to '_'.
3527
+
3528
+ Returns
3529
+ -------
3530
+ LArray
3531
+ array with unique values
3532
+
3533
+ Examples
3534
+ --------
3535
+ >>> arr = LArray([[0, 2, 0, 0],
3536
+ ... [1, 1, 1, 0]], 'a=a0,a1;b=b0..b3')
3537
+ >>> arr
3538
+ a\b b0 b1 b2 b3
3539
+ a0 0 2 0 0
3540
+ a1 1 1 1 0
3541
+
3542
+ By default unique() returns the first occurrence of each unique value in the order it appears:
3543
+
3544
+ >>> arr.unique()
3545
+ a_b a0_b0 a0_b1 a1_b0
3546
+ 0 2 1
3547
+
3548
+ To sort the unique values, use the sort argument:
3549
+
3550
+ >>> arr.unique(sort=True)
3551
+ a_b a0_b0 a1_b0 a0_b1
3552
+ 0 1 2
3553
+
3554
+ One can also compute unique sub-arrays (i.e. combination of values) along axes. In our example the a0=0, a1=1
3555
+ combination appears twice along the 'b' axis, so 'b2' is not returned:
3556
+
3557
+ >>> arr.unique('b')
3558
+ a\b b0 b1 b3
3559
+ a0 0 2 0
3560
+ a1 1 1 0
3561
+ >>> arr.unique('b', sort=True)
3562
+ a\b b3 b0 b1
3563
+ a0 0 0 2
3564
+ a1 0 1 1
3565
+ """
3566
+ if axes is not None :
3567
+ axes = self .axes [axes ]
3568
+
3569
+ assert axes is None or isinstance (axes , (Axis , AxisCollection ))
3570
+
3571
+ if not isinstance (axes , AxisCollection ):
3572
+ axis_idx = self .axes .index (axes ) if axes is not None else None
3573
+ # axis needs np >= 1.13
3574
+ _ , unq_index = np .unique (self , axis = axis_idx , return_index = True )
3575
+ if not sort :
3576
+ unq_index = np .sort (unq_index )
3577
+ if axes is None :
3578
+ return self .iflat .__getitem__ (unq_index , sep = sep )
3579
+ else :
3580
+ return self [axes .i [unq_index ]]
3581
+ else :
3582
+ if sort :
3583
+ raise NotImplementedError ('sort=True is not implemented for unique along multiple axes' )
3584
+ unq_list = []
3585
+ seen = set ()
3586
+ list_append = unq_list .append
3587
+ seen_add = seen .add
3588
+ sep_join = sep .join
3589
+ axis_name = sep_join (a .name for a in axes )
3590
+ first_axis_idx = self .axes .index (axes [0 ])
3591
+ # XXX: use combine_axes(axes).items() instead?
3592
+ for labels , value in self .items (axes ):
3593
+ hashable_value = value .data .tobytes () if isinstance (value , LArray ) else value
3594
+ if hashable_value not in seen :
3595
+ list_append ((sep_join (str (l ) for l in labels ), value ))
3596
+ seen_add (hashable_value )
3597
+ res_arr = stack (unq_list , axis_name )
3598
+ # transpose the combined axis at the position where the first of the combined axes was
3599
+ # TODO: use res_arr.transpose(res_arr.axes.move_axis(-1, first_axis_idx)) once #564 is implemented:
3600
+ # https://github.com/larray-project/larray/issues/564
3601
+ # stack adds the stacked axes at the end
3602
+ combined_axis = res_arr .axes [- 1 ]
3603
+ assert combined_axis .name == axis_name
3604
+ new_axes_order = res_arr .axes - combined_axis
3605
+ new_axes_order .insert (first_axis_idx , combined_axis )
3606
+ return res_arr .transpose (new_axes_order )
3607
+
3514
3608
@property
3515
3609
def info (self ):
3516
3610
"""Describes a LArray (metadata + shape and labels for each axis).
0 commit comments