Skip to content

Commit 05f3491

Browse files
committed
API/BUG: freq retention in value_counts
1 parent 5cc3240 commit 05f3491

File tree

2 files changed

+180
-0
lines changed

2 files changed

+180
-0
lines changed

pandas/core/algorithms.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -937,6 +937,36 @@ def value_counts_internal(
937937
if normalize:
938938
result = result / counts.sum()
939939

940+
# freq patching for DatetimeIndex, TimedeltaIndex
941+
try:
942+
from pandas import (
943+
DatetimeIndex,
944+
TimedeltaIndex,
945+
)
946+
947+
if (
948+
bins is None
949+
and not sort
950+
and isinstance(values, (DatetimeIndex, TimedeltaIndex))
951+
and values.freq is not None
952+
and isinstance(result.index, (DatetimeIndex, TimedeltaIndex))
953+
and len(result.index) == len(values)
954+
and result.index.equals(values)
955+
):
956+
base_freq = values.freq
957+
# Rebuild the index with the original freq; name preserved.
958+
if isinstance(result.index, DatetimeIndex):
959+
result.index = DatetimeIndex(
960+
result.index._data, freq=base_freq, name=result.index.name
961+
)
962+
else: # TimedeltaIndex
963+
result.index = TimedeltaIndex(
964+
result.index._data, freq=base_freq, name=result.index.name
965+
)
966+
except Exception:
967+
# If freq patching fails, does not affect value_counts
968+
pass
969+
940970
return result
941971

942972

pandas/tests/base/test_value_counts.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,153 @@ def test_value_counts_object_inference_deprecated():
339339
exp = dti.value_counts()
340340
exp.index = exp.index.astype(object)
341341
tm.assert_series_equal(res, exp)
342+
343+
344+
def _vc_make_index(kind: str, periods=5, freq="D"):
345+
if kind == "dt":
346+
return pd.date_range("2016-01-01", periods=periods, freq=freq)
347+
if kind == "td":
348+
return pd.timedelta_range(Timedelta(0), periods=periods, freq=freq)
349+
raise ValueError("kind must be 'dt' or 'td'")
350+
351+
352+
@pytest.mark.parametrize(
353+
"kind,freq,normalize",
354+
[
355+
("dt", "D", False),
356+
("dt", "D", True),
357+
("td", "D", False),
358+
("td", "D", True),
359+
("td", Timedelta(hours=1), False),
360+
("td", Timedelta(hours=1), True),
361+
],
362+
)
363+
def test_value_counts_freq_preserved_datetimelike_no_sort(kind, freq, normalize):
364+
idx = _vc_make_index(kind, periods=5, freq=freq)
365+
vc = idx.value_counts(sort=False, normalize=normalize)
366+
assert vc.index.freq == idx.freq
367+
if normalize:
368+
assert np.isclose(vc.values, 1 / len(idx)).all()
369+
370+
371+
@pytest.mark.parametrize(
372+
"kind,freq",
373+
[
374+
("dt", "D"),
375+
("td", "D"),
376+
("td", Timedelta(hours=1)),
377+
],
378+
)
379+
def test_value_counts_freq_drops_datetimelike_when_sorted(kind, freq):
380+
idx = _vc_make_index(kind, periods=5, freq=freq)
381+
vc = idx.value_counts() # default sort=True (reorders)
382+
assert vc.index.freq is None
383+
384+
385+
@pytest.mark.parametrize(
386+
"kind,freq",
387+
[
388+
("dt", "D"),
389+
("td", "D"),
390+
("td", Timedelta(hours=1)),
391+
],
392+
)
393+
def test_value_counts_freq_drops_datetimelike_with_duplicates(kind, freq):
394+
base = _vc_make_index(kind, periods=5, freq=freq)
395+
obj = base.insert(1, base[1]) # duplicate one label
396+
vc = obj.value_counts(sort=False)
397+
assert vc.index.freq is None
398+
399+
400+
@pytest.mark.parametrize(
401+
"kind,freq",
402+
[
403+
("dt", "D"),
404+
("td", "D"),
405+
("td", Timedelta(hours=1)),
406+
],
407+
)
408+
def test_value_counts_freq_drops_datetimelike_with_gap(kind, freq):
409+
base = _vc_make_index(kind, periods=5, freq=freq)
410+
obj = base.delete(2) # remove one step to break contiguity
411+
vc = obj.value_counts(sort=False)
412+
assert vc.index.freq is None
413+
414+
415+
@pytest.mark.parametrize(
416+
"kind,freq,dropna,expect_hasnans",
417+
[
418+
("dt", "D", False, True), # keep NaT
419+
("dt", "D", True, False), # drop NaT
420+
("td", "D", False, True),
421+
("td", "D", True, False),
422+
("td", Timedelta(hours=1), False, True),
423+
("td", Timedelta(hours=1), True, False),
424+
],
425+
)
426+
def test_value_counts_freq_drops_datetimelike_with_nat(
427+
kind, freq, dropna, expect_hasnans
428+
):
429+
base = _vc_make_index(kind, periods=3, freq=freq)
430+
obj = base.insert(1, pd.NaT)
431+
vc = obj.value_counts(dropna=dropna, sort=False)
432+
assert vc.index.freq is None
433+
assert vc.index.hasnans is expect_hasnans
434+
435+
436+
@pytest.mark.parametrize(
437+
"freq,start,periods,sort",
438+
[
439+
("D", "2016-01-01", 5, False),
440+
("D", "2016-01-01", 5, True),
441+
("M", "2016-01", 6, False), # MonthEnd
442+
("M", "2016-01", 6, True),
443+
("Q-DEC", "2016Q1", 4, False), # QuarterEnd (Dec anchored)
444+
("Q-DEC", "2016Q1", 4, True),
445+
("Y-DEC", "2014", 3, False), # YearEnd (Dec anchored)
446+
("Y-DEC", "2014", 3, True),
447+
],
448+
)
449+
def test_value_counts_period_freq_preserved_sort_and_nosort(freq, start, periods, sort):
450+
pi = pd.period_range(start=start, periods=periods, freq=freq)
451+
vc = pi.value_counts(sort=sort)
452+
assert isinstance(vc.index, pd.PeriodIndex)
453+
assert vc.index.dtype == pi.dtype
454+
assert vc.index.freq == pi.freq
455+
456+
457+
def test_value_counts_period_freq_preserved_with_duplicates():
458+
pi = pd.period_range("2016-01", periods=5, freq="M")
459+
obj = pi.insert(1, pi[1]) # duplicate one label
460+
vc = obj.value_counts(sort=False)
461+
assert isinstance(vc.index, pd.PeriodIndex)
462+
assert vc.index.dtype == pi.dtype
463+
assert vc.index.freq == pi.freq
464+
465+
466+
def test_value_counts_period_freq_preserved_with_gap():
467+
pi = pd.period_range("2016-01", periods=5, freq="M")
468+
obj = pi.delete(2) # remove one element
469+
vc = obj.value_counts(sort=False)
470+
assert isinstance(vc.index, pd.PeriodIndex)
471+
assert vc.index.dtype == pi.dtype
472+
assert vc.index.freq == pi.freq
473+
474+
475+
def test_value_counts_period_freq_preserved_with_normalize():
476+
pi = pd.period_range("2016-01", periods=4, freq="M")
477+
vc = pi.value_counts(normalize=True, sort=False)
478+
assert isinstance(vc.index, pd.PeriodIndex)
479+
assert vc.index.dtype == pi.dtype
480+
assert vc.index.freq == pi.freq
481+
assert np.isclose(vc.values, 1 / len(pi)).all()
482+
483+
484+
def test_value_counts_period_freq_preserved_with_nat_dropna_true():
485+
pi = pd.period_range("2016-01", periods=5, freq="M")
486+
obj = pi.insert(1, pd.NaT)
487+
vc = obj.value_counts(dropna=True, sort=False)
488+
assert not vc.index.hasnans
489+
assert isinstance(vc.index, pd.PeriodIndex)
490+
assert vc.index.dtype == pi.dtype
491+
assert vc.index.freq == pi.freq

0 commit comments

Comments
 (0)