Skip to content
364 changes: 174 additions & 190 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,176 +225,6 @@ def test_median(self):
int_ts = Series(np.ones(10, dtype=int), index=lrange(10))
tm.assert_almost_equal(np.median(int_ts), int_ts.median())

@pytest.mark.parametrize('dropna, expected', [
(True, Series([], dtype=np.float64)),
(False, Series([], dtype=np.float64))
])
def test_mode_empty(self, dropna, expected):
s = Series([], dtype=np.float64)
result = s.mode(dropna)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('dropna, data, expected', [
(True, [1, 1, 1, 2], [1]),
(True, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
(False, [1, 1, 1, 2], [1]),
(False, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
])
@pytest.mark.parametrize(
'dt',
list(np.typecodes['AllInteger'] + np.typecodes['Float'])
)
def test_mode_numerical(self, dropna, data, expected, dt):
s = Series(data, dtype=dt)
result = s.mode(dropna)
expected = Series(expected, dtype=dt)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('dropna, expected', [
(True, [1.0]),
(False, [1, np.nan]),
])
def test_mode_numerical_nan(self, dropna, expected):
s = Series([1, 1, 2, np.nan, np.nan])
result = s.mode(dropna)
expected = Series(expected)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
(True, ['b'], ['bar'], ['nan']),
(False, ['b'], [np.nan], ['nan'])
])
def test_mode_str_obj(self, dropna, expected1, expected2, expected3):
# Test string and object types.
data = ['a'] * 2 + ['b'] * 3

s = Series(data, dtype='c')
result = s.mode(dropna)
expected1 = Series(expected1, dtype='c')
tm.assert_series_equal(result, expected1)

data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]

s = Series(data, dtype=object)
result = s.mode(dropna).sort_values().reset_index(drop=True)
expected2 = Series(expected2, dtype=object)
tm.assert_series_equal(result, expected2)

data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]

s = Series(data, dtype=str)
result = s.mode(dropna)
expected3 = Series(expected3, dtype=str)
tm.assert_series_equal(result, expected3)

@pytest.mark.parametrize('dropna, expected1, expected2', [
(True, ['foo'], ['foo']),
(False, ['foo'], [np.nan])
])
def test_mode_mixeddtype(self, dropna, expected1, expected2):
s = Series([1, 'foo', 'foo'])
result = s.mode(dropna)
expected = Series(expected1)
tm.assert_series_equal(result, expected)

s = Series([1, 'foo', 'foo', np.nan, np.nan, np.nan])
result = s.mode(dropna).sort_values().reset_index(drop=True)
expected = Series(expected2, dtype=object)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('dropna, expected1, expected2', [
(True, ['1900-05-03', '2011-01-03', '2013-01-02'],
['2011-01-03', '2013-01-02']),
(False, [np.nan], [np.nan, '2011-01-03', '2013-01-02']),
])
def test_mode_datetime(self, dropna, expected1, expected2):
s = Series(['2011-01-03', '2013-01-02',
'1900-05-03', 'nan', 'nan'], dtype='M8[ns]')
result = s.mode(dropna)
expected1 = Series(expected1, dtype='M8[ns]')
tm.assert_series_equal(result, expected1)

s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
'2011-01-03', '2013-01-02', 'nan', 'nan'],
dtype='M8[ns]')
result = s.mode(dropna)
expected2 = Series(expected2, dtype='M8[ns]')
tm.assert_series_equal(result, expected2)

@pytest.mark.parametrize('dropna, expected1, expected2', [
(True, ['-1 days', '0 days', '1 days'], ['2 min', '1 day']),
(False, [np.nan], [np.nan, '2 min', '1 day']),
])
def test_mode_timedelta(self, dropna, expected1, expected2):
# gh-5986: Test timedelta types.

s = Series(['1 days', '-1 days', '0 days', 'nan', 'nan'],
dtype='timedelta64[ns]')
result = s.mode(dropna)
expected1 = Series(expected1, dtype='timedelta64[ns]')
tm.assert_series_equal(result, expected1)

s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
'2 min', '2 min', 'nan', 'nan'],
dtype='timedelta64[ns]')
result = s.mode(dropna)
expected2 = Series(expected2, dtype='timedelta64[ns]')
tm.assert_series_equal(result, expected2)

@pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
(True, Categorical([1, 2], categories=[1, 2]),
Categorical(['a'], categories=[1, 'a']),
Categorical([1, 3], categories=[1, 2, 3])),
(False, Categorical([np.nan], categories=[1, 2]),
Categorical([np.nan, 'a'], categories=[1, 'a']),
Categorical([np.nan, 1, 3], categories=[1, 2, 3])),
])
def test_mode_category(self, dropna, expected1, expected2, expected3):
s = Series(Categorical([1, 2, np.nan, np.nan]))
result = s.mode(dropna)
expected1 = Series(expected1, dtype='category')
tm.assert_series_equal(result, expected1)

s = Series(Categorical([1, 'a', 'a', np.nan, np.nan]))
result = s.mode(dropna)
expected2 = Series(expected2, dtype='category')
tm.assert_series_equal(result, expected2)

s = Series(Categorical([1, 1, 2, 3, 3, np.nan, np.nan]))
result = s.mode(dropna)
expected3 = Series(expected3, dtype='category')
tm.assert_series_equal(result, expected3)

@pytest.mark.parametrize('dropna, expected1, expected2', [
(True, [2**63], [1, 2**63]),
(False, [2**63], [1, 2**63])
])
def test_mode_intoverflow(self, dropna, expected1, expected2):
# Test for uint64 overflow.
s = Series([1, 2**63, 2**63], dtype=np.uint64)
result = s.mode(dropna)
expected1 = Series(expected1, dtype=np.uint64)
tm.assert_series_equal(result, expected1)

s = Series([1, 2**63], dtype=np.uint64)
result = s.mode(dropna)
expected2 = Series(expected2, dtype=np.uint64)
tm.assert_series_equal(result, expected2)

@pytest.mark.skipif(not compat.PY3, reason="only PY3")
def test_mode_sortwarning(self):
# Check for the warning that is raised when the mode
# results cannot be sorted

expected = Series(['foo', np.nan])
s = Series([1, 'foo', 'foo', np.nan, np.nan])

with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
result = s.mode(dropna=False)
result = result.sort_values().reset_index(drop=True)

tm.assert_series_equal(result, expected)

def test_prod(self):
self._check_stat_op('prod', np.prod)

Expand Down Expand Up @@ -1940,6 +1770,180 @@ def s_main_dtypes():
return df


class TestMode(object):

@pytest.mark.parametrize('dropna, expected', [
(True, Series([], dtype=np.float64)),
(False, Series([], dtype=np.float64))
])
def test_mode_empty(self, dropna, expected):
s = Series([], dtype=np.float64)
result = s.mode(dropna)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('dropna, data, expected', [
(True, [1, 1, 1, 2], [1]),
(True, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
(False, [1, 1, 1, 2], [1]),
(False, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
])
@pytest.mark.parametrize(
'dt',
list(np.typecodes['AllInteger'] + np.typecodes['Float'])
)
def test_mode_numerical(self, dropna, data, expected, dt):
s = Series(data, dtype=dt)
result = s.mode(dropna)
expected = Series(expected, dtype=dt)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('dropna, expected', [
(True, [1.0]),
(False, [1, np.nan]),
])
def test_mode_numerical_nan(self, dropna, expected):
s = Series([1, 1, 2, np.nan, np.nan])
result = s.mode(dropna)
expected = Series(expected)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
(True, ['b'], ['bar'], ['nan']),
(False, ['b'], [np.nan], ['nan'])
])
def test_mode_str_obj(self, dropna, expected1, expected2, expected3):
# Test string and object types.
data = ['a'] * 2 + ['b'] * 3

s = Series(data, dtype='c')
result = s.mode(dropna)
expected1 = Series(expected1, dtype='c')
tm.assert_series_equal(result, expected1)

data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]

s = Series(data, dtype=object)
result = s.mode(dropna)
expected2 = Series(expected2, dtype=object)
tm.assert_series_equal(result, expected2)

data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]

s = Series(data, dtype=str)
result = s.mode(dropna)
expected3 = Series(expected3, dtype=str)
tm.assert_series_equal(result, expected3)

@pytest.mark.parametrize('dropna, expected1, expected2', [
(True, ['foo'], ['foo']),
(False, ['foo'], [np.nan])
])
def test_mode_mixeddtype(self, dropna, expected1, expected2):
s = Series([1, 'foo', 'foo'])
result = s.mode(dropna)
expected = Series(expected1)
tm.assert_series_equal(result, expected)

s = Series([1, 'foo', 'foo', np.nan, np.nan, np.nan])
result = s.mode(dropna)
expected = Series(expected2, dtype=object)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('dropna, expected1, expected2', [
(True, ['1900-05-03', '2011-01-03', '2013-01-02'],
['2011-01-03', '2013-01-02']),
(False, [np.nan], [np.nan, '2011-01-03', '2013-01-02']),
])
def test_mode_datetime(self, dropna, expected1, expected2):
s = Series(['2011-01-03', '2013-01-02',
'1900-05-03', 'nan', 'nan'], dtype='M8[ns]')
result = s.mode(dropna)
expected1 = Series(expected1, dtype='M8[ns]')
tm.assert_series_equal(result, expected1)

s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
'2011-01-03', '2013-01-02', 'nan', 'nan'],
dtype='M8[ns]')
result = s.mode(dropna)
expected2 = Series(expected2, dtype='M8[ns]')
tm.assert_series_equal(result, expected2)

@pytest.mark.parametrize('dropna, expected1, expected2', [
(True, ['-1 days', '0 days', '1 days'], ['2 min', '1 day']),
(False, [np.nan], [np.nan, '2 min', '1 day']),
])
def test_mode_timedelta(self, dropna, expected1, expected2):
# gh-5986: Test timedelta types.

s = Series(['1 days', '-1 days', '0 days', 'nan', 'nan'],
dtype='timedelta64[ns]')
result = s.mode(dropna)
expected1 = Series(expected1, dtype='timedelta64[ns]')
tm.assert_series_equal(result, expected1)

s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
'2 min', '2 min', 'nan', 'nan'],
dtype='timedelta64[ns]')
result = s.mode(dropna)
expected2 = Series(expected2, dtype='timedelta64[ns]')
tm.assert_series_equal(result, expected2)

@pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
(True, Categorical([1, 2], categories=[1, 2]),
Categorical(['a'], categories=[1, 'a']),
Categorical([3, 1], categories=[3, 2, 1], ordered=True)),
(False, Categorical([np.nan], categories=[1, 2]),
Categorical([np.nan, 'a'], categories=[1, 'a']),
Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True)),
])
def test_mode_category(self, dropna, expected1, expected2, expected3):
s = Series(Categorical([1, 2, np.nan, np.nan]))
result = s.mode(dropna)
expected1 = Series(expected1, dtype='category')
tm.assert_series_equal(result, expected1)

s = Series(Categorical([1, 'a', 'a', np.nan, np.nan]))
result = s.mode(dropna)
expected2 = Series(expected2, dtype='category')
tm.assert_series_equal(result, expected2)

s = Series(Categorical([1, 1, 2, 3, 3, np.nan, np.nan],
categories=[3, 2, 1], ordered=True))
result = s.mode(dropna)
expected3 = Series(expected3, dtype='category')
tm.assert_series_equal(result, expected3)

@pytest.mark.parametrize('dropna, expected1, expected2', [
(True, [2**63], [1, 2**63]),
(False, [2**63], [1, 2**63])
])
def test_mode_intoverflow(self, dropna, expected1, expected2):
# Test for uint64 overflow.
s = Series([1, 2**63, 2**63], dtype=np.uint64)
result = s.mode(dropna)
expected1 = Series(expected1, dtype=np.uint64)
tm.assert_series_equal(result, expected1)

s = Series([1, 2**63], dtype=np.uint64)
result = s.mode(dropna)
expected2 = Series(expected2, dtype=np.uint64)
tm.assert_series_equal(result, expected2)

@pytest.mark.skipif(not compat.PY3, reason="only PY3")
def test_mode_sortwarning(self):
# Check for the warning that is raised when the mode
# results cannot be sorted

expected = Series(['foo', np.nan])
s = Series([1, 'foo', 'foo', np.nan, np.nan])

with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
result = s.mode(dropna=False)
result = result.sort_values().reset_index(drop=True)

tm.assert_series_equal(result, expected)


class TestNLargestNSmallest(object):

@pytest.mark.parametrize(
Expand Down Expand Up @@ -2068,26 +2072,6 @@ def test_min_max(self):
assert np.isnan(_min)
assert _max == 1

def test_mode(self):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I deleted this test because I think it's all covered in test_mode_category

s = Series(Categorical([1, 1, 2, 4, 5, 5, 5],
categories=[5, 4, 3, 2, 1], ordered=True))
res = s.mode()
exp = Series(Categorical([5], categories=[
5, 4, 3, 2, 1], ordered=True))
tm.assert_series_equal(res, exp)
s = Series(Categorical([1, 1, 1, 4, 5, 5, 5],
categories=[5, 4, 3, 2, 1], ordered=True))
res = s.mode()
exp = Series(Categorical([5, 1], categories=[
5, 4, 3, 2, 1], ordered=True))
tm.assert_series_equal(res, exp)
s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1],
ordered=True))
res = s.mode()
exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1],
ordered=True))
tm.assert_series_equal(res, exp)

def test_value_counts(self):
# GH 12835
cats = Categorical(list('abcccb'), categories=list('cabd'))
Expand Down