Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ Bug Fixes




- Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`)



Expand Down
10 changes: 6 additions & 4 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,25 +202,27 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
raise TypeError("bins argument only works with numeric data.")
values = cat.labels

if com.is_integer_dtype(values.dtype):
dtype = values.dtype
if com.is_integer_dtype(dtype):
values = com._ensure_int64(values)
keys, counts = htable.value_count_int64(values)

elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
dtype = values.dtype
values = values.view(np.int64)
keys, counts = htable.value_count_int64(values)

from pandas.lib import NaT
msk = keys != NaT.value
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should be comparing vs iNaT no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's iNaT? (NaT.value == -9223372036854775808).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its the same, but just use: pandas.tslib.iNaT is the convention

keys, counts = keys[msk], counts[msk]
# convert the keys back to the dtype we came in
keys = Series(keys, dtype=dtype)
keys = keys.astype(dtype)

else:
mask = com.isnull(values)
values = com._ensure_object(values)
keys, counts = htable.value_count_object(values, mask)

result = Series(counts, index=com._values_from_object(keys))

if bins is not None:
# TODO: This next line should be more efficient
result = result.reindex(np.arange(len(cat.levels)), fill_value=0)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,20 @@ def test_value_counts_dtypes(self):

self.assertRaises(TypeError, lambda s: algos.value_counts(s, bins=1), ['1', 1])

def test_value_counts_nat(self):
td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]')
dt = pd.to_datetime(['NaT', '2014-01-01'])

res_td = algos.value_counts(td)
res_dt = algos.value_counts(dt)

self.assertEqual(len(res_td), 1)
self.assertEqual(len(res_dt), 1)

exp_dt = pd.Series({pd.Timestamp('2014-01-01 00:00:00'): 1})
tm.assert_series_equal(res_dt, exp_dt)

# TODO same for res_td (timedelta)

def test_quantile():
s = Series(np.random.randn(100))
Expand Down