@@ -3323,6 +3323,65 @@ def datastep(self, code, casout=None, *args, **kwargs):
3323
3323
3324
3324
raise SWATError (out .status )
3325
3325
3326
+ def nunique (self , dropna = True , casout = None ):
3327
+ '''
3328
+ Return number of unique elements per column in the CASTable
3329
+
3330
+ See Also
3331
+ --------
3332
+ :meth:`CASColumn.nunique`
3333
+ :meth:`pandas.DataFrame.nunique`
3334
+
3335
+ Returns
3336
+ -------
3337
+ :class:`pandas.Series`
3338
+ If no By groups are specified.
3339
+ :class:`swat.CASResults`
3340
+ If By groups are specified.
3341
+
3342
+ '''
3343
+ if self ._use_casout_for_stat (casout ):
3344
+ return self ._get_casout_stat ('nunique' , skipna = dropna , casout = casout )
3345
+
3346
+ return self ._nunique (skipna = dropna )
3347
+
3348
+ def _nunique (self , skipna = True ):
3349
+ '''
3350
+ Return number of unique elements per column in the CASTable.
3351
+
3352
+ Returns
3353
+ -------
3354
+ :class:`pandas.Series`
3355
+ If By groups are not specified.
3356
+ :class:`swat.CASResults`
3357
+ If By groups are specified.
3358
+ '''
3359
+ # If we have a groupby table, we need to flatten down to one DataFrame
3360
+ if self .get_groupby_vars ():
3361
+ results = self ._retrieve ('simple.distinct' , includeMissing = not skipna )
3362
+ results .pop ('ByGroupInfo' , None )
3363
+ # Same bygroups flattening as CASTable.nmiss
3364
+ out = pd .concat (list (results .values ()))
3365
+ out = out .set_index ('Column' , append = True )['NDistinct' ]
3366
+ out = out .unstack (level = - 1 )
3367
+ out = out .astype ('int64' )
3368
+ # The columns that match the groupby vars will be useless
3369
+ out = out .drop (labels = self .get_groupby_vars (), axis = 1 )
3370
+ if isinstance (out , pd .DataFrame ):
3371
+ out .columns .name = None
3372
+ return out
3373
+ else :
3374
+ distinct_table = self ._retrieve ('simple.distinct' ,
3375
+ includeMissing = not skipna )['Distinct' ]
3376
+ # Reduce table to a Series based off the NDistinct column
3377
+ distinct_table = distinct_table .set_index ('Column' )
3378
+ distinct_series = distinct_table .loc [:, 'NDistinct' ].astype ('int64' )
3379
+ # Strip names from Series to match pandas nunique
3380
+ distinct_series .index .name = None
3381
+ distinct_series .name = None
3382
+
3383
+ return distinct_series
3384
+
3326
3385
# def isin(self, values, casout=None):
3327
3386
# raise NotImplementedError
3328
3387
@@ -4442,7 +4501,7 @@ def _get_casout_stat(self, stat, axis=None, skipna=True, level=None,
4442
4501
# NOTE: Only works with a single column
4443
4502
elif stat == 'nunique' :
4444
4503
out = self ._retrieve ('simple.distinct' , includemissing = not skipna ,
4445
- inputs = [ inputs [ 0 ]] , casout = casout , ** kwargs )
4504
+ inputs = inputs , casout = casout , ** kwargs )
4446
4505
return self ._normalize_distinct_casout (out ['OutputCasTables' ]['casTable' ][0 ],
4447
4506
skipna = skipna )
4448
4507
0 commit comments