diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d721213dc38e7..39d7ff9a4d44f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1081,6 +1081,7 @@ Groupby/resample/rolling - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`) +- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` where the end of window was not indexed correctly. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 0c8ea28b60ce8..979660929f2bb 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -442,7 +442,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + if i == 0 or not is_monotonic_increasing_bounds or s < end[i]: prev_value = values[s] num_consecutive_same_value = 0 diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 18aafa0d7b71e..ee7086c444ee5 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -8,9 +8,6 @@ from pandas.compat import ( IS64, - is_platform_arm, - is_platform_power, - is_platform_riscv64, ) from pandas.errors import Pandas4Warning @@ -1082,27 +1079,91 @@ def test_rolling_sem(frame_or_series): tm.assert_series_equal(result, expected) -@pytest.mark.xfail( - is_platform_arm() or is_platform_power() or is_platform_riscv64(), - reason="GH 38921", -) @pytest.mark.parametrize( - ("func", "third_value", "values"), + ("func", "values", "window", "ddof", "expected_values"), [ - ("var", 1, [5e33, 0, 0.5, 0.5, 2, 0]), - ("std", 1, [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0]), - ("var", 2, [5e33, 0.5, 0, 0.5, 2, 0]), - ("std", 2, [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0]), + ("var", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, [5e33, 0, 0.5, 0.5, 2, 0]), + ( + "std", + [99999999999999999, 1, 1, 2, 3, 1, 1], + 2, + 1, + [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0], + ), + ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, [5e33, 0.5, 0, 0.5, 2, 0]), + ( + "std", + [99999999999999999, 1, 2, 2, 3, 1, 1], + 2, + 1, + [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0], + ), + ( + "std", + [1.2e03, 1.3e17, 1.5e17, 1.995e03, 1.990e03], + 2, + 1, + [9.192388e16, 1.414214e16, 1.060660e17, 3.535534e00], + ), + ( + "var", + [ + 0.00000000e00, + 0.00000000e00, + 3.16188252e-18, + 2.95781651e-16, + 2.23153542e-51, + 0.00000000e00, + 0.00000000e00, + 5.39943432e-48, + 1.38206260e-73, + 0.00000000e00, + ], + 3, + 1, + [ + 3.33250036e-036, + 2.88538519e-032, + 2.88538519e-032, + 2.91622617e-032, + 1.65991678e-102, + 9.71796366e-096, + 9.71796366e-096, + 9.71796366e-096, + ], + ), + ( + "std", + [1, -1, 0, 1, 3, 2, -2, 10000000000, 1, 2, 0, -2, 1, 3, 0, 1], + 6, + 1, + [ + 1.41421356e00, + 1.87082869e00, + 4.08248290e09, + 4.08248290e09, + 4.08248290e09, + 4.08248290e09, + 4.08248290e09, + 4.08248290e09, + 1.72240142e00, + 1.75119007e00, + 1.64316767e00, + ], + ), ], ) -def test_rolling_var_numerical_issues(func, third_value, values): - # GH: 37051 - ds = Series([99999999999999999, 1, third_value, 2, 3, 1, 1]) - result = getattr(ds.rolling(2), func)() - expected = Series([np.nan] + values) - tm.assert_series_equal(result, expected) +def test_rolling_var_correctness(func, values, window, ddof, expected_values): + # GH: 37051, 42064, 54518, 52407, 47721 + ts = Series(values) + result = getattr(ts.rolling(window=window), func)(ddof=ddof) + if result.last_valid_index(): + result = result[ + result.first_valid_index() : result.last_valid_index() + 1 + ].reset_index(drop=True) + expected = Series(expected_values) + tm.assert_series_equal(result, expected, atol=1e-55) # GH 42064 - # new `roll_var` will output 0.0 correctly tm.assert_series_equal(result == 0, expected == 0)