@@ -452,28 +452,20 @@ def index_type(self):
452
452
453
453
def unique (self ):
454
454
# type: () -> ExtensionArray
455
- pass
455
+ # https://github.com/pandas-dev/pandas/pull/19869
456
+ _ , indices = np .unique (self .data , return_index = True )
457
+ data = self .data .take (np .sort (indices ))
458
+ return self ._from_ndarray (data )
456
459
457
- def _factorize (self , sort = False ):
460
+ def factorize (self , sort = False ):
458
461
# XXX: Verify this, check for better algo
459
- # astype to avoid endianness issues in pd.factorize
460
- a , _ = pd .factorize (self .data ['lo' ].astype ('u8' ))
461
- b , _ = pd .factorize (self .data ['hi' ].astype ('u8' ))
462
-
463
- labels = np .bitwise_xor .reduce (
464
- np .concatenate ([a .reshape (- 1 , 1 ),
465
- b .reshape (- 1 , 1 )], axis = 1 ),
466
- axis = 1
467
- )
468
-
469
- # TODO: refactor into a .unique
470
- # TODO: Handle empty, scalar, etc.
471
- mask = np .zeros (len (labels ), dtype = bool )
472
- mask [0 ] = True
473
- inner_mask = (labels [1 :] - labels [:- 1 ]) != 0
474
- mask [1 :] = inner_mask
475
-
476
- uniques = self [mask ]
462
+ uniques , indices , labels = np .unique (self .data ,
463
+ return_index = True ,
464
+ return_inverse = True )
465
+ if not sort :
466
+ # Unsort, since np.unique sorts
467
+ uniques = self ._from_ndarray (self .data .take (np .sort (indices )))
468
+ labels = np .argsort (uniques .data ).take (labels )
477
469
return labels , uniques
478
470
479
471
0 commit comments