Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ Groupby/resample/rolling
- Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would not retain ``com``, ``span``, ``alpha`` or ``halflife`` attributes (:issue:`40164`)
- :class:`core.window.ewm.ExponentialMovingWindow` now raises a ``NotImplementedError`` when specifying ``times`` with ``adjust=False`` due to an incorrect calculation (:issue:`40098`)
- Bug in :meth:`Series.asfreq` and :meth:`DataFrame.asfreq` dropping rows when the index is not sorted (:issue:`39805`)
- Bug in :class:`core.window.rolling.RollingGroupby` where an level of a :class:`MultiIndex` would be dropped if it overlapped with a groupby label (:issue:`38787`, :issue:`38523`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's make this a full sub-section and mention this goes back to 1.1.x behavior

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


Reshaping
^^^^^^^^^
Expand Down
23 changes: 10 additions & 13 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,26 +577,23 @@ def _apply(
numba_cache_key,
**kwargs,
)
# Reconstruct the resulting MultiIndex from tuples
# Reconstruct the resulting MultiIndex
# 1st set of levels = group by labels
# 2nd set of levels = original index
# Ignore 2nd set of levels if a group by label include an index level
result_index_names = copy.copy(self._grouper.names)
grouped_object_index = None
# 2nd set of levels = original DataFrame/Series index
grouped_object_index = self.obj.index
grouped_index_name = [*grouped_object_index.names]
groupby_keys = copy.copy(self._grouper.names)
result_index_names = groupby_keys + grouped_index_name

column_keys = [
drop_columns = [
key
for key in result_index_names
for key in self._grouper.names
if key not in self.obj.index.names or key is None
]

if len(column_keys) == len(result_index_names):
grouped_object_index = self.obj.index
grouped_index_name = [*grouped_object_index.names]
result_index_names += grouped_index_name
else:
if len(drop_columns) != len(groupby_keys):
# Our result will have still kept the column in the result
result = result.drop(columns=column_keys, errors="ignore")
result = result.drop(columns=drop_columns, errors="ignore")

codes = self._grouper.codes
levels = copy.copy(self._grouper.levels)
Expand Down
47 changes: 42 additions & 5 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,23 +588,31 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
with pytest.raises(ValueError, match=f"{key} must be monotonic"):
df.groupby("c").rolling("60min", **rollings)

def test_groupby_rolling_group_keys(self):
@pytest.mark.parametrize("group_keys", [True, False])
def test_groupby_rolling_group_keys(self, group_keys):
# GH 37641
# GH 38523: GH 37641 actually was not a bug.
# group_keys only applies to groupby.apply directly
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))

s = Series([1, 2, 3], index=index)
result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean()
result = s.groupby(["idx1", "idx2"], group_keys=group_keys).rolling(1).mean()
expected = Series(
[1.0, 2.0, 3.0],
index=MultiIndex.from_tuples(
[("val1", "val1"), ("val1", "val1"), ("val2", "val2")],
names=["idx1", "idx2"],
[
("val1", "val1", "val1", "val1"),
("val1", "val1", "val1", "val1"),
("val2", "val2", "val2", "val2"),
],
names=["idx1", "idx2", "idx1", "idx2"],
),
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_index_level_and_column_label(self):
# The groupby keys should not appear as a resulting column
arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))

Expand All @@ -613,7 +621,12 @@ def test_groupby_rolling_index_level_and_column_label(self):
expected = DataFrame(
{"B": [0.0, 1.0, 2.0]},
index=MultiIndex.from_tuples(
[("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"]
[
("val1", 1, "val1", "val1"),
("val1", 1, "val1", "val1"),
("val2", 2, "val2", "val2"),
],
names=["idx1", "A", "idx1", "idx2"],
),
)
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -695,6 +708,30 @@ def test_by_column_not_in_values(self, columns):
assert "A" not in result.columns
tm.assert_frame_equal(g.obj, original_obj)

def test_groupby_level(self):
# GH 38523, 38787
arrays = [
["Falcon", "Falcon", "Parrot", "Parrot"],
["Captive", "Wild", "Captive", "Wild"],
]
index = MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
df = DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
result = df.groupby(level=0)["Max Speed"].rolling(2).sum()
expected = Series(
[np.nan, 740.0, np.nan, 50.0],
index=MultiIndex.from_tuples(
[
("Falcon", "Falcon", "Captive"),
("Falcon", "Falcon", "Wild"),
("Parrot", "Parrot", "Captive"),
("Parrot", "Parrot", "Wild"),
],
names=["Animal", "Animal", "Type"],
),
name="Max Speed",
)
tm.assert_series_equal(result, expected)


class TestExpanding:
def setup_method(self):
Expand Down