From cf9f8faba9df4ca3a30e865a2ab120efb818e604 Mon Sep 17 00:00:00 2001 From: suzyahyah Date: Mon, 29 Sep 2025 23:39:50 -0400 Subject: [PATCH 1/6] fix: bug in sliding window --- pandas/_libs/window/aggregations.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 0c8ea28b60ce8..c8154ada07acc 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -442,7 +442,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: + if i == 0 or not is_monotonic_increasing_bounds or s <= end[i]: prev_value = values[s] num_consecutive_same_value = 0 From ac2dd40ef83dfea58369e5eade4ae3447d0817d8 Mon Sep 17 00:00:00 2001 From: suzyahyah Date: Mon, 29 Sep 2025 23:40:38 -0400 Subject: [PATCH 2/6] feat: consolidate tests for rolling variance correctness against numpy --- pandas/tests/window/test_rolling.py | 62 +++++++++++++++++++---------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 18aafa0d7b71e..339a13e102497 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -4,6 +4,9 @@ ) import numpy as np +from numpy.lib.stride_tricks import sliding_window_view + + import pytest from pandas.compat import ( @@ -1081,30 +1084,47 @@ def test_rolling_sem(frame_or_series): expected = Series([np.nan] + [0.7071067811865476] * 2) tm.assert_series_equal(result, expected) - -@pytest.mark.xfail( - is_platform_arm() or is_platform_power() or is_platform_riscv64(), - reason="GH 38921", -) @pytest.mark.parametrize( - ("func", "third_value", "values"), - [ - ("var", 1, [5e33, 0, 0.5, 0.5, 2, 0]), - ("std", 1, [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0]), - ("var", 2, [5e33, 0.5, 0, 0.5, 2, 0]), - ("std", 2, [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0]), - ], + ("func", "values", "window", "ddof", "exp_value"), + [ + ("var", [2.72993945, 1.58444294, 4.14371708, 4.92961687, 2.7138744 ,3.48168586, 0.69505519, 1.87511994, 4.20167276, 0.04797675], 3, 1, "numpy_compute"), + ("std", [2.72993945, 1.58444294, 4.14371708, 4.92961687, 2.7138744 ,3.48168586, 0.69505519, 1.87511994, 4.20167276, 0.04797675], 3, 1, "numpy_compute"), + ("var", [2.72993945, 1.58444294, 4.14371708, 4.92961687, 2.7138744 ,3.48168586, 0.69505519, 1.87511994, 4.20167276, 0.04797675], 2, 1, "numpy_compute"), + ("std", [2.72993945, 1.58444294, 4.14371708, 4.92961687, 2.7138744 ,3.48168586, 0.69505519, 1.87511994, 4.20167276, 0.04797675], 2, 1, "numpy_compute"), + ("var", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, 0), + ("std", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, 0), + ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, 0), + ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, 0), + ("var", [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], 5, 0, "numpy_compute"), + ], ) -def test_rolling_var_numerical_issues(func, third_value, values): - # GH: 37051 - ds = Series([99999999999999999, 1, third_value, 2, 3, 1, 1]) - result = getattr(ds.rolling(2), func)() - expected = Series([np.nan] + values) - tm.assert_series_equal(result, expected) +def test_rolling_var_correctness(func, values, window, ddof, exp_value): + # This tests subsume the previous tests under test_rolling_var_numerical_issues + # GH: 37051, 42064, 54333 + ts = Series(values) + result = getattr(ts.rolling(window=window, center=True), func)(ddof=ddof) + if result.last_valid_index(): + result = result[result.first_valid_index() : result.last_valid_index()+1].reset_index(drop=True) + expected = Series(getattr(sliding_window_view(values, window_shape=window), func)(axis=-1, ddof=ddof)) #.var(axis=-1, ddof=ddof)) + tm.assert_series_equal(result, expected, atol=1e-55) # GH 42064 - # new `roll_var` will output 0.0 correctly - tm.assert_series_equal(result == 0, expected == 0) - + if exp_value == 0: + # new `roll_var` will output 0.0 correctly + tm.assert_series_equal(result==0, expected==0) + +def test_rolling_var_numerical_stability(): + # GH 52407 + A = [0.00000000e+00, 0.00000000e+00, 3.16188252e-18, 2.95781651e-16, + 2.23153542e-51, 0.00000000e+00, 0.00000000e+00, 5.39943432e-48, + 1.38206260e-73, 0.00000000e+00] + ts = Series(A) + + result = ts.rolling(window=3, center=True).var(ddof=1) + result = result[result.first_valid_index() : result.last_valid_index()+1].reset_index(drop=True) + + # numpy implementation + expected = Series(sliding_window_view(A, window_shape=3).var(axis=-1, ddof=1)) + tm.assert_series_equal(result, expected, atol=1e-55) def test_timeoffset_as_window_parameter_for_corr(unit): # GH: 28266 From fea0d10d84d1bd67308b0811870aba30c181a65d Mon Sep 17 00:00:00 2001 From: suzyahyah Date: Tue, 30 Sep 2025 00:43:08 -0400 Subject: [PATCH 3/6] lint: reformatted by pre-commit run --- pandas/tests/window/test_rolling.py | 135 ++++++++++++++++++++++------ 1 file changed, 109 insertions(+), 26 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 339a13e102497..8560ea08be66e 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -5,15 +5,10 @@ import numpy as np from numpy.lib.stride_tricks import sliding_window_view - - import pytest from pandas.compat import ( IS64, - is_platform_arm, - is_platform_power, - is_platform_riscv64, ) from pandas.errors import Pandas4Warning @@ -1084,19 +1079,88 @@ def test_rolling_sem(frame_or_series): expected = Series([np.nan] + [0.7071067811865476] * 2) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( - ("func", "values", "window", "ddof", "exp_value"), - [ - ("var", [2.72993945, 1.58444294, 4.14371708, 4.92961687, 2.7138744 ,3.48168586, 0.69505519, 1.87511994, 4.20167276, 0.04797675], 3, 1, "numpy_compute"), - ("std", [2.72993945, 1.58444294, 4.14371708, 4.92961687, 2.7138744 ,3.48168586, 0.69505519, 1.87511994, 4.20167276, 0.04797675], 3, 1, "numpy_compute"), - ("var", [2.72993945, 1.58444294, 4.14371708, 4.92961687, 2.7138744 ,3.48168586, 0.69505519, 1.87511994, 4.20167276, 0.04797675], 2, 1, "numpy_compute"), - ("std", [2.72993945, 1.58444294, 4.14371708, 4.92961687, 2.7138744 ,3.48168586, 0.69505519, 1.87511994, 4.20167276, 0.04797675], 2, 1, "numpy_compute"), - ("var", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, 0), - ("std", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, 0), - ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, 0), - ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, 0), - ("var", [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], 5, 0, "numpy_compute"), - ], + ("func", "values", "window", "ddof", "exp_value"), + [ + ( + "var", + [ + 2.72993945, + 1.58444294, + 4.14371708, + 4.92961687, + 2.7138744, + 3.48168586, + 0.69505519, + 1.87511994, + 4.20167276, + 0.04797675, + ], + 3, + 1, + "numpy_compute", + ), + ( + "std", + [ + 2.72993945, + 1.58444294, + 4.14371708, + 4.92961687, + 2.7138744, + 3.48168586, + 0.69505519, + 1.87511994, + 4.20167276, + 0.04797675, + ], + 3, + 1, + "numpy_compute", + ), + ( + "var", + [ + 2.72993945, + 1.58444294, + 4.14371708, + 4.92961687, + 2.7138744, + 3.48168586, + 0.69505519, + 1.87511994, + 4.20167276, + 0.04797675, + ], + 2, + 1, + "numpy_compute", + ), + ( + "std", + [ + 2.72993945, + 1.58444294, + 4.14371708, + 4.92961687, + 2.7138744, + 3.48168586, + 0.69505519, + 1.87511994, + 4.20167276, + 0.04797675, + ], + 2, + 1, + "numpy_compute", + ), + ("var", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, 0), + ("std", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, 0), + ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, 0), + ("std", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, 0), + ("var", [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], 5, 0, "numpy_compute"), + ], ) def test_rolling_var_correctness(func, values, window, ddof, exp_value): # This tests subsume the previous tests under test_rolling_var_numerical_issues @@ -1104,28 +1168,47 @@ def test_rolling_var_correctness(func, values, window, ddof, exp_value): ts = Series(values) result = getattr(ts.rolling(window=window, center=True), func)(ddof=ddof) if result.last_valid_index(): - result = result[result.first_valid_index() : result.last_valid_index()+1].reset_index(drop=True) - expected = Series(getattr(sliding_window_view(values, window_shape=window), func)(axis=-1, ddof=ddof)) #.var(axis=-1, ddof=ddof)) + result = result[ + result.first_valid_index() : result.last_valid_index() + 1 + ].reset_index(drop=True) + expected = Series( + getattr(sliding_window_view(values, window_shape=window), func)( + axis=-1, ddof=ddof + ) + ) tm.assert_series_equal(result, expected, atol=1e-55) # GH 42064 if exp_value == 0: - # new `roll_var` will output 0.0 correctly - tm.assert_series_equal(result==0, expected==0) + # new `roll_var` will output 0.0 correctly + tm.assert_series_equal(result == 0, expected == 0) + def test_rolling_var_numerical_stability(): # GH 52407 - A = [0.00000000e+00, 0.00000000e+00, 3.16188252e-18, 2.95781651e-16, - 2.23153542e-51, 0.00000000e+00, 0.00000000e+00, 5.39943432e-48, - 1.38206260e-73, 0.00000000e+00] + A = [ + 0.00000000e00, + 0.00000000e00, + 3.16188252e-18, + 2.95781651e-16, + 2.23153542e-51, + 0.00000000e00, + 0.00000000e00, + 5.39943432e-48, + 1.38206260e-73, + 0.00000000e00, + ] ts = Series(A) result = ts.rolling(window=3, center=True).var(ddof=1) - result = result[result.first_valid_index() : result.last_valid_index()+1].reset_index(drop=True) - + result = result[ + result.first_valid_index() : result.last_valid_index() + 1 + ].reset_index(drop=True) + # numpy implementation expected = Series(sliding_window_view(A, window_shape=3).var(axis=-1, ddof=1)) tm.assert_series_equal(result, expected, atol=1e-55) + def test_timeoffset_as_window_parameter_for_corr(unit): # GH: 28266 dti = DatetimeIndex( From e4a3939ef7efdea6d3eb40eaef0147cb5e400e26 Mon Sep 17 00:00:00 2001 From: suzyahyah Date: Tue, 30 Sep 2025 21:44:53 -0400 Subject: [PATCH 4/6] fix: use hardcoded values for tests --- pandas/tests/window/test_rolling.py | 154 ++++++++++------------------ 1 file changed, 56 insertions(+), 98 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 8560ea08be66e..fe91f8dab1937 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -4,7 +4,6 @@ ) import numpy as np -from numpy.lib.stride_tricks import sliding_window_view import pytest from pandas.compat import ( @@ -1081,132 +1080,91 @@ def test_rolling_sem(frame_or_series): @pytest.mark.parametrize( - ("func", "values", "window", "ddof", "exp_value"), + ("func", "values", "window", "ddof", "expected_values"), [ + ("var", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, [5e33, 0, 0.5, 0.5, 2, 0]), ( - "var", - [ - 2.72993945, - 1.58444294, - 4.14371708, - 4.92961687, - 2.7138744, - 3.48168586, - 0.69505519, - 1.87511994, - 4.20167276, - 0.04797675, - ], - 3, + "std", + [99999999999999999, 1, 1, 2, 3, 1, 1], + 2, 1, - "numpy_compute", + [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0], ), + ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, [5e33, 0.5, 0, 0.5, 2, 0]), ( "std", - [ - 2.72993945, - 1.58444294, - 4.14371708, - 4.92961687, - 2.7138744, - 3.48168586, - 0.69505519, - 1.87511994, - 4.20167276, - 0.04797675, - ], - 3, + [99999999999999999, 1, 2, 2, 3, 1, 1], + 2, + 1, + [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0], + ), + ( + "std", + [1.2e03, 1.3e17, 1.5e17, 1.995e03, 1.990e03], + 2, 1, - "numpy_compute", + [9.192388e16, 1.414214e16, 1.060660e17, 3.535534e00], ), ( "var", [ - 2.72993945, - 1.58444294, - 4.14371708, - 4.92961687, - 2.7138744, - 3.48168586, - 0.69505519, - 1.87511994, - 4.20167276, - 0.04797675, + 0.00000000e00, + 0.00000000e00, + 3.16188252e-18, + 2.95781651e-16, + 2.23153542e-51, + 0.00000000e00, + 0.00000000e00, + 5.39943432e-48, + 1.38206260e-73, + 0.00000000e00, ], - 2, + 3, 1, - "numpy_compute", + [ + 3.33250036e-036, + 2.88538519e-032, + 2.88538519e-032, + 2.91622617e-032, + 1.65991678e-102, + 9.71796366e-096, + 9.71796366e-096, + 9.71796366e-096, + ], ), ( "std", + [1, -1, 0, 1, 3, 2, -2, 10000000000, 1, 2, 0, -2, 1, 3, 0, 1], + 6, + 1, [ - 2.72993945, - 1.58444294, - 4.14371708, - 4.92961687, - 2.7138744, - 3.48168586, - 0.69505519, - 1.87511994, - 4.20167276, - 0.04797675, + 1.41421356e00, + 1.87082869e00, + 4.08248290e09, + 4.08248290e09, + 4.08248290e09, + 4.08248290e09, + 4.08248290e09, + 4.08248290e09, + 1.72240142e00, + 1.75119007e00, + 1.64316767e00, ], - 2, - 1, - "numpy_compute", ), - ("var", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, 0), - ("std", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, 0), - ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, 0), - ("std", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, 0), - ("var", [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], 5, 0, "numpy_compute"), ], ) -def test_rolling_var_correctness(func, values, window, ddof, exp_value): - # This tests subsume the previous tests under test_rolling_var_numerical_issues - # GH: 37051, 42064, 54333 +def test_rolling_var_correctness(func, values, window, ddof, expected_values): + # GH: 37051, 42064, 54518, 52407, 47721 ts = Series(values) result = getattr(ts.rolling(window=window, center=True), func)(ddof=ddof) if result.last_valid_index(): result = result[ result.first_valid_index() : result.last_valid_index() + 1 ].reset_index(drop=True) - expected = Series( - getattr(sliding_window_view(values, window_shape=window), func)( - axis=-1, ddof=ddof - ) - ) + expected = Series(expected_values) tm.assert_series_equal(result, expected, atol=1e-55) # GH 42064 - if exp_value == 0: - # new `roll_var` will output 0.0 correctly - tm.assert_series_equal(result == 0, expected == 0) - - -def test_rolling_var_numerical_stability(): - # GH 52407 - A = [ - 0.00000000e00, - 0.00000000e00, - 3.16188252e-18, - 2.95781651e-16, - 2.23153542e-51, - 0.00000000e00, - 0.00000000e00, - 5.39943432e-48, - 1.38206260e-73, - 0.00000000e00, - ] - ts = Series(A) - - result = ts.rolling(window=3, center=True).var(ddof=1) - result = result[ - result.first_valid_index() : result.last_valid_index() + 1 - ].reset_index(drop=True) - - # numpy implementation - expected = Series(sliding_window_view(A, window_shape=3).var(axis=-1, ddof=1)) - tm.assert_series_equal(result, expected, atol=1e-55) + tm.assert_series_equal(result == 0, expected == 0) def test_timeoffset_as_window_parameter_for_corr(unit): From 6431ae50bb546dfc9467a47abe3ea6cc221a750b Mon Sep 17 00:00:00 2001 From: suzyahyah Date: Wed, 1 Oct 2025 00:32:03 -0400 Subject: [PATCH 5/6] fix: sliding window exclusive end index --- pandas/_libs/window/aggregations.pyx | 2 +- pandas/tests/window/test_rolling.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index c8154ada07acc..979660929f2bb 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -442,7 +442,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_increasing_bounds or s <= end[i]: + if i == 0 or not is_monotonic_increasing_bounds or s < end[i]: prev_value = values[s] num_consecutive_same_value = 0 diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index fe91f8dab1937..ee7086c444ee5 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1156,7 +1156,7 @@ def test_rolling_sem(frame_or_series): def test_rolling_var_correctness(func, values, window, ddof, expected_values): # GH: 37051, 42064, 54518, 52407, 47721 ts = Series(values) - result = getattr(ts.rolling(window=window, center=True), func)(ddof=ddof) + result = getattr(ts.rolling(window=window), func)(ddof=ddof) if result.last_valid_index(): result = result[ result.first_valid_index() : result.last_valid_index() + 1 From b0aa633bdb7427bf4f35f011d66f6b7677ea148d Mon Sep 17 00:00:00 2001 From: suzyahyah Date: Sat, 4 Oct 2025 00:51:06 -0400 Subject: [PATCH 6/6] docs: add changeline to v3.0.0.rst --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d721213dc38e7..39d7ff9a4d44f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1081,6 +1081,7 @@ Groupby/resample/rolling - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`) +- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` where the end of window was not indexed correctly. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`) Reshaping ^^^^^^^^^