Skip to content

Commit 9556805

Browse files
author
Michael Erickson
committed
Added nunique method to CASTable
1 parent eec0dcb commit 9556805

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

swat/cas/table.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3323,6 +3323,46 @@ def datastep(self, code, casout=None, *args, **kwargs):
33233323

33243324
raise SWATError(out.status)
33253325

3326+
def nunique(self, dropna=True, casout=None):
3327+
'''
3328+
Return number of unique elements per column in the CASTable
3329+
3330+
See Also
3331+
--------
3332+
:meth:`CASColumn.nunique`
3333+
:meth:`pandas.DataFrame.nunique`
3334+
3335+
Returns
3336+
-------
3337+
int
3338+
If no By groups are specified.
3339+
:class:`pandas .Series`
3340+
If By groups are specified.
3341+
3342+
'''
3343+
if self._use_casout_for_stat(casout):
3344+
return self._get_casout_stat('nunique', skipna=dropna, casout=casout)
3345+
3346+
return self._nunique(skipna=dropna)
3347+
3348+
def _nunique(self, skipna=True):
3349+
'''
3350+
Return number of unique elements per column in the CASTable.
3351+
3352+
Returns
3353+
-------
3354+
:class:`pandas .Series`
3355+
If By groups are specified.
3356+
'''
3357+
distinct_table = self._retrieve('simple.distinct', includeMissing=not skipna)['Distinct']
3358+
# Reduce table to a Series based off the NDistinct column
3359+
distinct_table = distinct_table.set_index('Column').loc[:,'NDistinct'].astype('int64')
3360+
# Strip names from Series to match pandas nunique
3361+
distinct_table.index.name = None
3362+
distinct_table.name = None
3363+
3364+
return distinct_table
3365+
33263366
# def isin(self, values, casout=None):
33273367
# raise NotImplementedError
33283368

0 commit comments

Comments
 (0)