From 40505bbd0a44a1717cf9c7f949a8ac4ff73a9122 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 16 Nov 2016 22:44:11 -0800 Subject: [PATCH] BUG: Error upon Series.Groupby.nunique with empty Series (#12553) Modified tests simplify tests Add whatsnew Moved len check --- doc/source/whatsnew/v0.19.2.txt | 2 +- pandas/core/groupby.py | 6 +++++- pandas/tests/test_groupby.py | 7 +++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index cafbdb731f494..f6e75a148447e 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -57,7 +57,7 @@ Bug Fixes - Bug in resampling a ``DatetimeIndex`` in local TZ, covering a DST change, which would raise ``AmbiguousTimeError`` (:issue:`14682`) - +- Bug in ``Series.groupby.nunique()`` raising an ``IndexError`` for an empty ``Series`` (:issue:`12553`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ea26f5c0d29b8..108fc339b03c8 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2898,6 +2898,7 @@ def true_and_notnull(x, *args, **kwargs): def nunique(self, dropna=True): """ Returns number of unique elements in the group """ ids, _, _ = self.grouper.group_info + val = self.obj.get_values() try: @@ -2928,7 +2929,10 @@ def nunique(self, dropna=True): inc[idx] = 1 out = np.add.reduceat(inc, idx).astype('int64', copy=False) - res = out if ids[0] != -1 else out[1:] + if len(ids): + res = out if ids[0] != -1 else out[1:] + else: + res = out[1:] ri = self.grouper.result_index # we might have duplications among the bins diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 52d1c5c3681e0..cce06a51a18b5 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -6754,6 +6754,13 @@ def test_nunique_with_object(self): expected = pd.Series([1] * 5, name='name', index=index) tm.assert_series_equal(result, expected) + def test_nunique_with_empty_series(self): + # GH 12553 + data = pd.Series(name='name') + result = data.groupby(level=0).nunique() + expected = pd.Series(name='name', dtype='int64') + tm.assert_series_equal(result, expected) + def test_transform_with_non_scalar_group(self): # GH 10165 cols = pd.MultiIndex.from_tuples([