diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 92cf2bed9ca47..e74cc013ba201 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -244,7 +244,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`) -- +- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 3d3a19a1c7a40..a4b83695f57b6 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -310,7 +310,10 @@ cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, t = y - mean_x[0] compensation[0] = t + mean_x[0] - y delta = t - mean_x[0] = mean_x[0] + delta / nobs[0] + if nobs[0]: + mean_x[0] = mean_x[0] + delta / nobs[0] + else: + mean_x[0] = 0 ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0]) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 5d7fc50620ef8..03b43026c9a6c 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -695,6 +695,31 @@ def test_groupby_rolling_object_doesnt_affect_groupby_apply(self): assert not g.mutated assert not g.grouper.mutated + @pytest.mark.parametrize( + ("window", "min_periods", "closed", "expected"), + [ + (2, 0, "left", [None, 0.0, 1.0, 1.0, None, 0.0, 1.0, 1.0]), + (2, 2, "left", [None, None, 1.0, 1.0, None, None, 1.0, 1.0]), + (4, 4, "left", [None, None, None, None, None, None, None, None]), + (4, 4, "right", [None, None, None, 5.0, None, None, None, 5.0]), + ], + ) + def test_groupby_rolling_var(self, window, min_periods, closed, expected): + df = DataFrame([1, 2, 3, 4, 5, 6, 7, 8]) + result = ( + df.groupby([1, 2, 1, 2, 1, 2, 1, 2]) + .rolling(window=window, min_periods=min_periods, closed=closed) + .var(0) + ) + expected_result = DataFrame( + np.array(expected, dtype="float64"), + index=MultiIndex( + levels=[[1, 2], [0, 1, 2, 3, 4, 5, 6, 7]], + codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 2, 4, 6, 1, 3, 5, 7]], + ), + ) + tm.assert_frame_equal(result, expected_result) + @pytest.mark.parametrize( "columns", [MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]] )