Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ Groupby/resample/rolling
- Bug in :meth:`df.groupby(..).quantile() <pandas.core.groupby.DataFrameGroupBy.quantile>` and :meth:`df.resample(..).quantile() <pandas.core.resample.Resampler.quantile>` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`)
- Bug in :meth:`Rolling.median` and :meth:`Rolling.quantile` returned wrong values for :class:`BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`)
- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
- Bug in :meth:`RollingGroupby` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this is ok to backport ton 1.1.5 if possible

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this fixes a regression from 1.0.5 -> 1.1.0, so backport is preferable.


Reshaping
^^^^^^^^^
Expand Down
34 changes: 27 additions & 7 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,19 +762,39 @@ def _apply(
use_numba_cache,
**kwargs,
)
# Compose MultiIndex result from grouping levels then rolling level
# Aggregate the MultiIndex data as tuples then the level names
grouped_object_index = self.obj.index
grouped_index_name = [*grouped_object_index.names]
groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
result_index_names = groupby_keys + grouped_index_name
# Reconstruct the resulting MultiIndex from tuples
# 1st set of levels = group by labels
# 2nd set of levels = original index
# Ignore 2nd set of levels if a group by label include an index level
result_index_names = [
grouping.name for grouping in self._groupby.grouper._groupings
]
grouped_object_index = None

column_keys = [
key
for key in result_index_names
if key not in self.obj.index.names or key is None
]

if len(column_keys) == len(result_index_names):
grouped_object_index = self.obj.index
grouped_index_name = [*grouped_object_index.names]
result_index_names += grouped_index_name
else:
# Our result will have still kept the column in the result
result = result.drop(columns=column_keys, errors="ignore")

result_index_data = []
for key, values in self._groupby.grouper.indices.items():
for value in values:
data = [
*com.maybe_make_list(key),
*com.maybe_make_list(grouped_object_index[value]),
*com.maybe_make_list(
grouped_object_index[value]
if grouped_object_index is not None
else []
),
]
result_index_data.append(tuple(data))

Expand Down
32 changes: 31 additions & 1 deletion pandas/tests/window/test_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

import pandas as pd
from pandas import DataFrame, Series
from pandas import DataFrame, MultiIndex, Series
import pandas._testing as tm
from pandas.core.groupby.groupby import get_groupby

Expand Down Expand Up @@ -601,3 +601,33 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
df = df.set_index("a")
with pytest.raises(ValueError, match=f"{key} must be monotonic"):
df.groupby("c").rolling("60min", **rollings)

def test_groupby_rolling_group_keys(self):
# GH 37641
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))

s = Series([1, 2, 3], index=index)
result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean()
expected = Series(
[1.0, 2.0, 3.0],
index=MultiIndex.from_tuples(
[("val1", "val1"), ("val1", "val1"), ("val2", "val2")],
names=["idx1", "idx2"],
),
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_index_level_and_column_label(self):
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))

df = DataFrame({"A": [1, 1, 2], "B": range(3)}, index=index)
result = df.groupby(["idx1", "A"]).rolling(1).mean()
expected = DataFrame(
{"B": [0.0, 1.0, 2.0]},
index=MultiIndex.from_tuples(
[("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"]
),
)
tm.assert_frame_equal(result, expected)