diff --git a/README.md b/README.md
index 895cfb69e5edd..0c20b7f63493e 100644
--- a/README.md
+++ b/README.md
@@ -115,7 +115,7 @@ details, see the commit logs at https://github.com/pandas-dev/pandas.
## Dependencies
- [NumPy - Adds support for large, multi-dimensional arrays, matrices and high-level mathematical functions to operate on these arrays](https://www.numpy.org)
- [python-dateutil - Provides powerful extensions to the standard datetime module](https://dateutil.readthedocs.io/en/stable/index.html)
-- [pytz - Brings the Olson tz database into Python which allows accurate and cross platform timezone calculations](https://github.com/stub42/pytz)
+- [tzdata - Provides an IANA time zone database](https://tzdata.readthedocs.io/en/latest/)
See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies.
@@ -188,3 +188,12 @@ As contributors and maintainers to this project, you are expected to abide by pa
[Go to Top](#table-of-contents)
+
+---
+
+## Helpful Resources
+
+- 📘 [Pandas Cheat Sheet (Official)](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf)
+- 🎓 [Beginner’s Guide to Pandas](https://realpython.com/pandas-python-explore-dataset/)
+- 🛠️ [Good First Issues to Contribute](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+label%3A%22good+first+issue%22)
+- 💬 [Join the Pandas Community on Slack](https://pandas.pydata.org/docs/dev/development/community.html#community-slack)
diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst
index aa7e7845bfa7a..d39ef3a45b1bd 100644
--- a/doc/source/development/index.rst
+++ b/doc/source/development/index.rst
@@ -16,6 +16,7 @@ Development
contributing_environment
contributing_documentation
contributing_codebase
+ code_guidelines
maintaining
internals
copy_on_write
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 03534bbee4c58..5368562ac97fd 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1,3156 +1,3160 @@
-"""
-Provide a generic structure to support window functions,
-similar to how we have a Groupby object.
-"""
-
-from __future__ import annotations
-
-import copy
-from datetime import timedelta
-from functools import partial
-import inspect
-from textwrap import dedent
-from typing import (
- TYPE_CHECKING,
- Any,
- Literal,
- final,
- overload,
-)
-
-import numpy as np
-
-from pandas._libs.tslibs import (
- BaseOffset,
- Timedelta,
- to_offset,
-)
-import pandas._libs.window.aggregations as window_aggregations
-from pandas.compat._optional import import_optional_dependency
-from pandas.errors import DataError
-from pandas.util._decorators import (
- Appender,
- Substitution,
- doc,
-)
-
-from pandas.core.dtypes.common import (
- ensure_float64,
- is_bool,
- is_integer,
- is_numeric_dtype,
- needs_i8_conversion,
-)
-from pandas.core.dtypes.dtypes import ArrowDtype
-from pandas.core.dtypes.generic import (
- ABCDataFrame,
- ABCSeries,
-)
-from pandas.core.dtypes.missing import notna
-
-from pandas.core._numba import executor
-from pandas.core.algorithms import factorize
-from pandas.core.apply import (
- ResamplerWindowApply,
- reconstruct_func,
-)
-from pandas.core.arrays import ExtensionArray
-from pandas.core.base import SelectionMixin
-import pandas.core.common as com
-from pandas.core.indexers.objects import (
- BaseIndexer,
- FixedWindowIndexer,
- GroupbyIndexer,
- VariableWindowIndexer,
-)
-from pandas.core.indexes.api import (
- DatetimeIndex,
- Index,
- MultiIndex,
- PeriodIndex,
- TimedeltaIndex,
-)
-from pandas.core.reshape.concat import concat
-from pandas.core.util.numba_ import (
- get_jit_arguments,
- maybe_use_numba,
- prepare_function_arguments,
-)
-from pandas.core.window.common import (
- flex_binary_moment,
- zsqrt,
-)
-from pandas.core.window.doc import (
- _shared_docs,
- create_section_header,
- kwargs_numeric_only,
- kwargs_scipy,
- numba_notes,
- template_header,
- template_pipe,
- template_returns,
- template_see_also,
- window_agg_numba_parameters,
- window_apply_parameters,
-)
-from pandas.core.window.numba_ import (
- generate_manual_numpy_nan_agg_with_axis,
- generate_numba_apply_func,
- generate_numba_table_func,
-)
-
-if TYPE_CHECKING:
- from collections.abc import Callable
- from collections.abc import (
- Hashable,
- Iterator,
- Sized,
- )
-
- from pandas._typing import (
- ArrayLike,
- Concatenate,
- NDFrameT,
- QuantileInterpolation,
- P,
- Self,
- T,
- WindowingRankType,
- npt,
- )
-
- from pandas import (
- DataFrame,
- Series,
- )
- from pandas.core.generic import NDFrame
- from pandas.core.groupby.ops import BaseGrouper
-
-from pandas.core.arrays.datetimelike import dtype_to_unit
-
-
-class BaseWindow(SelectionMixin):
- """Provides utilities for performing windowing operations."""
-
- _attributes: list[str] = []
- exclusions: frozenset[Hashable] = frozenset()
- _on: Index
-
- def __init__(
- self,
- obj: NDFrame,
- window=None,
- min_periods: int | None = None,
- center: bool | None = False,
- win_type: str | None = None,
- on: str | Index | None = None,
- closed: str | None = None,
- step: int | None = None,
- method: str = "single",
- *,
- selection=None,
- ) -> None:
- self.obj = obj
- self.on = on
- self.closed = closed
- self.step = step
- self.window = window
- self.min_periods = min_periods
- self.center = center
- self.win_type = win_type
- self.method = method
- self._win_freq_i8: int | None = None
- if self.on is None:
- self._on = self.obj.index
- elif isinstance(self.on, Index):
- self._on = self.on
- elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
- self._on = Index(self.obj[self.on])
- else:
- raise ValueError(
- f"invalid on specified as {self.on}, "
- "must be a column (of DataFrame), an Index or None"
- )
-
- self._selection = selection
- self._validate()
-
- def _validate(self) -> None:
- if self.center is not None and not is_bool(self.center):
- raise ValueError("center must be a boolean")
- if self.min_periods is not None:
- if not is_integer(self.min_periods):
- raise ValueError("min_periods must be an integer")
- if self.min_periods < 0:
- raise ValueError("min_periods must be >= 0")
- if is_integer(self.window) and self.min_periods > self.window:
- raise ValueError(
- f"min_periods {self.min_periods} must be <= window {self.window}"
- )
- if self.closed is not None and self.closed not in [
- "right",
- "both",
- "left",
- "neither",
- ]:
- raise ValueError("closed must be 'right', 'left', 'both' or 'neither'")
- if not isinstance(self.obj, (ABCSeries, ABCDataFrame)):
- raise TypeError(f"invalid type: {type(self)}")
- if isinstance(self.window, BaseIndexer):
- # Validate that the passed BaseIndexer subclass has
- # a get_window_bounds with the correct signature.
- get_window_bounds_signature = inspect.signature(
- self.window.get_window_bounds
- ).parameters.keys()
- expected_signature = inspect.signature(
- BaseIndexer().get_window_bounds
- ).parameters.keys()
- if get_window_bounds_signature != expected_signature:
- raise ValueError(
- f"{type(self.window).__name__} does not implement "
- f"the correct signature for get_window_bounds"
- )
- if self.method not in ["table", "single"]:
- raise ValueError("method must be 'table' or 'single")
- if self.step is not None:
- if not is_integer(self.step):
- raise ValueError("step must be an integer")
- if self.step < 0:
- raise ValueError("step must be >= 0")
-
- def _check_window_bounds(
- self, start: np.ndarray, end: np.ndarray, num_vals: int
- ) -> None:
- if len(start) != len(end):
- raise ValueError(
- f"start ({len(start)}) and end ({len(end)}) bounds must be the "
- f"same length"
- )
- if len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1):
- raise ValueError(
- f"start and end bounds ({len(start)}) must be the same length "
- f"as the object ({num_vals}) divided by the step ({self.step}) "
- f"if given and rounded up"
- )
-
- def _slice_axis_for_step(self, index: Index, result: Sized | None = None) -> Index:
- """
- Slices the index for a given result and the preset step.
- """
- return (
- index
- if result is None or len(result) == len(index)
- else index[:: self.step]
- )
-
- def _validate_numeric_only(self, name: str, numeric_only: bool) -> None:
- """
- Validate numeric_only argument, raising if invalid for the input.
-
- Parameters
- ----------
- name : str
- Name of the operator (kernel).
- numeric_only : bool
- Value passed by user.
- """
- if (
- self._selected_obj.ndim == 1
- and numeric_only
- and not is_numeric_dtype(self._selected_obj.dtype)
- ):
- raise NotImplementedError(
- f"{type(self).__name__}.{name} does not implement numeric_only"
- )
-
- def _make_numeric_only(self, obj: NDFrameT) -> NDFrameT:
- """Subset DataFrame to numeric columns.
-
- Parameters
- ----------
- obj : DataFrame
-
- Returns
- -------
- obj subset to numeric-only columns.
- """
- result = obj.select_dtypes(include=["number"], exclude=["timedelta"])
- return result
-
- def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:
- """
- Split data into blocks & return conformed data.
- """
- # filter out the on from the object
- if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2:
- obj = obj.reindex(columns=obj.columns.difference([self.on], sort=False))
- if obj.ndim > 1 and numeric_only:
- obj = self._make_numeric_only(obj)
- return obj
-
- def _gotitem(self, key, ndim, subset=None):
- """
- Sub-classes to define. Return a sliced object.
-
- Parameters
- ----------
- key : str / list of selections
- ndim : {1, 2}
- requested ndim of result
- subset : object, default None
- subset to act on
- """
- # create a new object to prevent aliasing
- if subset is None:
- subset = self.obj
-
- # we need to make a shallow copy of ourselves
- # with the same groupby
- kwargs = {attr: getattr(self, attr) for attr in self._attributes}
-
- selection = self._infer_selection(key, subset)
- new_win = type(self)(subset, selection=selection, **kwargs)
- return new_win
-
- def __getattr__(self, attr: str):
- if attr in self._internal_names_set:
- return object.__getattribute__(self, attr)
- if attr in self.obj:
- return self[attr]
-
- raise AttributeError(
- f"'{type(self).__name__}' object has no attribute '{attr}'"
- )
-
- def _dir_additions(self):
- return self.obj._dir_additions()
-
- def __repr__(self) -> str:
- """
- Provide a nice str repr of our rolling object.
- """
- attrs_list = (
- f"{attr_name}={getattr(self, attr_name)}"
- for attr_name in self._attributes
- if getattr(self, attr_name, None) is not None and attr_name[0] != "_"
- )
- attrs = ",".join(attrs_list)
- return f"{type(self).__name__} [{attrs}]"
-
- def __iter__(self) -> Iterator:
- obj = self._selected_obj.set_axis(self._on)
- obj = self._create_data(obj)
- indexer = self._get_window_indexer()
-
- start, end = indexer.get_window_bounds(
- num_values=len(obj),
- min_periods=self.min_periods,
- center=self.center,
- closed=self.closed,
- step=self.step,
- )
- self._check_window_bounds(start, end, len(obj))
-
- for s, e in zip(start, end):
- result = obj.iloc[slice(s, e)]
- yield result
-
- def _prep_values(self, values: ArrayLike) -> np.ndarray:
- """Convert input to numpy arrays for Cython routines"""
- if needs_i8_conversion(values.dtype):
- raise NotImplementedError(
- f"ops for {type(self).__name__} for this "
- f"dtype {values.dtype} are not implemented"
- )
- # GH #12373 : rolling functions error on float32 data
- # make sure the data is coerced to float64
- try:
- if isinstance(values, ExtensionArray):
- values = values.to_numpy(np.float64, na_value=np.nan)
- else:
- values = ensure_float64(values)
- except (ValueError, TypeError) as err:
- raise TypeError(f"cannot handle this type -> {values.dtype}") from err
-
- # Convert inf to nan for C funcs
- inf = np.isinf(values)
- if inf.any():
- values = np.where(inf, np.nan, values)
-
- return values
-
- def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None:
- # if we have an 'on' column we want to put it back into
- # the results in the same location
- from pandas import Series
-
- if self.on is not None and not self._on.equals(obj.index):
- name = self._on.name
- extra_col = Series(self._on, index=self.obj.index, name=name, copy=False)
- if name in result.columns:
- # TODO: sure we want to overwrite results?
- result[name] = extra_col
- elif name in result.index.names:
- pass
- elif name in self._selected_obj.columns:
- # insert in the same location as we had in _selected_obj
- old_cols = self._selected_obj.columns
- new_cols = result.columns
- old_loc = old_cols.get_loc(name)
- overlap = new_cols.intersection(old_cols[:old_loc])
- new_loc = len(overlap)
- result.insert(new_loc, name, extra_col)
- else:
- # insert at the end
- result[name] = extra_col
-
- @property
- def _index_array(self) -> npt.NDArray[np.int64] | None:
- # TODO: why do we get here with e.g. MultiIndex?
- if isinstance(self._on, (PeriodIndex, DatetimeIndex, TimedeltaIndex)):
- return self._on.asi8
- elif isinstance(self._on.dtype, ArrowDtype) and self._on.dtype.kind in "mM":
- return self._on.to_numpy(dtype=np.int64)
- return None
-
- def _resolve_output(self, out: DataFrame, obj: DataFrame) -> DataFrame:
- """Validate and finalize result."""
- if out.shape[1] == 0 and obj.shape[1] > 0:
- raise DataError("No numeric types to aggregate")
- if out.shape[1] == 0:
- return obj.astype("float64")
-
- self._insert_on_column(out, obj)
- return out
-
- def _get_window_indexer(self) -> BaseIndexer:
- """
- Return an indexer class that will compute the window start and end bounds
- """
- if isinstance(self.window, BaseIndexer):
- return self.window
- if self._win_freq_i8 is not None:
- return VariableWindowIndexer(
- index_array=self._index_array,
- window_size=self._win_freq_i8,
- center=self.center,
- )
- return FixedWindowIndexer(window_size=self.window)
-
- def _apply_series(
- self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None
- ) -> Series:
- """
- Series version of _apply_columnwise
- """
- obj = self._create_data(self._selected_obj)
-
- if name == "count":
- # GH 12541: Special case for count where we support date-like types
- obj = notna(obj).astype(int)
- try:
- values = self._prep_values(obj._values)
- except (TypeError, NotImplementedError) as err:
- raise DataError("No numeric types to aggregate") from err
-
- result = homogeneous_func(values)
- index = self._slice_axis_for_step(obj.index, result)
- return obj._constructor(result, index=index, name=obj.name)
-
- def _apply_columnwise(
- self,
- homogeneous_func: Callable[..., ArrayLike],
- name: str,
- numeric_only: bool = False,
- ) -> DataFrame | Series:
- """
- Apply the given function to the DataFrame broken down into homogeneous
- sub-frames.
- """
- self._validate_numeric_only(name, numeric_only)
- if self._selected_obj.ndim == 1:
- return self._apply_series(homogeneous_func, name)
-
- obj = self._create_data(self._selected_obj, numeric_only)
- if name == "count":
- # GH 12541: Special case for count where we support date-like types
- obj = notna(obj).astype(int)
- obj._mgr = obj._mgr.consolidate()
-
- taker = []
- res_values = []
- for i, arr in enumerate(obj._iter_column_arrays()):
- # GH#42736 operate column-wise instead of block-wise
- # As of 2.0, hfunc will raise for nuisance columns
- try:
- arr = self._prep_values(arr)
- except (TypeError, NotImplementedError) as err:
- raise DataError(
- f"Cannot aggregate non-numeric type: {arr.dtype}"
- ) from err
- res = homogeneous_func(arr)
- res_values.append(res)
- taker.append(i)
-
- index = self._slice_axis_for_step(
- obj.index, res_values[0] if len(res_values) > 0 else None
- )
- df = type(obj)._from_arrays(
- res_values,
- index=index,
- columns=obj.columns.take(taker),
- verify_integrity=False,
- )
-
- return self._resolve_output(df, obj)
-
- def _apply_tablewise(
- self,
- homogeneous_func: Callable[..., ArrayLike],
- name: str | None = None,
- numeric_only: bool = False,
- ) -> DataFrame | Series:
- """
- Apply the given function to the DataFrame across the entire object
- """
- if self._selected_obj.ndim == 1:
- raise ValueError("method='table' not applicable for Series objects.")
- obj = self._create_data(self._selected_obj, numeric_only)
- values = self._prep_values(obj.to_numpy())
- result = homogeneous_func(values)
- index = self._slice_axis_for_step(obj.index, result)
- columns = (
- obj.columns
- if result.shape[1] == len(obj.columns)
- else obj.columns[:: self.step]
- )
- out = obj._constructor(result, index=index, columns=columns)
-
- return self._resolve_output(out, obj)
-
- def _apply_pairwise(
- self,
- target: DataFrame | Series,
- other: DataFrame | Series | None,
- pairwise: bool | None,
- func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],
- numeric_only: bool,
- ) -> DataFrame | Series:
- """
- Apply the given pairwise function given 2 pandas objects (DataFrame/Series)
- """
- target = self._create_data(target, numeric_only)
- if other is None:
- other = target
- # only default unset
- pairwise = True if pairwise is None else pairwise
- elif not isinstance(other, (ABCDataFrame, ABCSeries)):
- raise ValueError("other must be a DataFrame or Series")
- elif other.ndim == 2 and numeric_only:
- other = self._make_numeric_only(other)
-
- return flex_binary_moment(target, other, func, pairwise=bool(pairwise))
-
- def _apply(
- self,
- func: Callable[..., Any],
- name: str,
- numeric_only: bool = False,
- numba_args: tuple[Any, ...] = (),
- **kwargs,
- ):
- """
- Rolling statistical measure using supplied function.
-
- Designed to be used with passed-in Cython array-based functions.
-
- Parameters
- ----------
- func : callable function to apply
- name : str,
- numba_args : tuple
- args to be passed when func is a numba func
- **kwargs
- additional arguments for rolling function and window function
-
- Returns
- -------
- y : type of input
- """
- window_indexer = self._get_window_indexer()
- min_periods = (
- self.min_periods
- if self.min_periods is not None
- else window_indexer.window_size
- )
-
- def homogeneous_func(values: np.ndarray):
- # calculation function
-
- if values.size == 0:
- return values.copy()
-
- def calc(x):
- start, end = window_indexer.get_window_bounds(
- num_values=len(x),
- min_periods=min_periods,
- center=self.center,
- closed=self.closed,
- step=self.step,
- )
- self._check_window_bounds(start, end, len(x))
-
- return func(x, start, end, min_periods, *numba_args)
-
- with np.errstate(all="ignore"):
- result = calc(values)
-
- return result
-
- if self.method == "single":
- return self._apply_columnwise(homogeneous_func, name, numeric_only)
- else:
- return self._apply_tablewise(homogeneous_func, name, numeric_only)
-
- def _numba_apply(
- self,
- func: Callable[..., Any],
- engine_kwargs: dict[str, bool] | None = None,
- **func_kwargs,
- ):
- window_indexer = self._get_window_indexer()
- min_periods = (
- self.min_periods
- if self.min_periods is not None
- else window_indexer.window_size
- )
- obj = self._create_data(self._selected_obj)
- values = self._prep_values(obj.to_numpy())
- if values.ndim == 1:
- values = values.reshape(-1, 1)
- start, end = window_indexer.get_window_bounds(
- num_values=len(values),
- min_periods=min_periods,
- center=self.center,
- closed=self.closed,
- step=self.step,
- )
- self._check_window_bounds(start, end, len(values))
- # For now, map everything to float to match the Cython impl
- # even though it is wrong
- # TODO: Could preserve correct dtypes in future
- # xref #53214
- dtype_mapping = executor.float_dtype_mapping
- aggregator = executor.generate_shared_aggregator(
- func,
- dtype_mapping,
- is_grouped_kernel=False,
- **get_jit_arguments(engine_kwargs),
- )
- result = aggregator(
- values.T, start=start, end=end, min_periods=min_periods, **func_kwargs
- ).T
- index = self._slice_axis_for_step(obj.index, result)
- if obj.ndim == 1:
- result = result.squeeze()
- out = obj._constructor(result, index=index, name=obj.name)
- return out
- else:
- columns = self._slice_axis_for_step(obj.columns, result.T)
- out = obj._constructor(result, index=index, columns=columns)
- return self._resolve_output(out, obj)
-
- def aggregate(self, func=None, *args, **kwargs):
- relabeling, func, columns, order = reconstruct_func(func, **kwargs)
- result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
- if isinstance(result, ABCDataFrame) and relabeling:
- result = result.iloc[:, order]
- result.columns = columns # type: ignore[union-attr]
- if result is None:
- return self.apply(func, raw=False, args=args, kwargs=kwargs)
- return result
-
- agg = aggregate
-
-
-class BaseWindowGroupby(BaseWindow):
- """
- Provide the groupby windowing facilities.
- """
-
- _grouper: BaseGrouper
- _as_index: bool
- _attributes: list[str] = ["_grouper"]
-
- def __init__(
- self,
- obj: DataFrame | Series,
- *args,
- _grouper: BaseGrouper,
- _as_index: bool = True,
- **kwargs,
- ) -> None:
- from pandas.core.groupby.ops import BaseGrouper
-
- if not isinstance(_grouper, BaseGrouper):
- raise ValueError("Must pass a BaseGrouper object.")
- self._grouper = _grouper
- self._as_index = _as_index
- # GH 32262: It's convention to keep the grouping column in
- # groupby., but unexpected to users in
- # groupby.rolling.
- obj = obj.drop(columns=self._grouper.names, errors="ignore")
- # GH 15354
- if kwargs.get("step") is not None:
- raise NotImplementedError("step not implemented for groupby")
- super().__init__(obj, *args, **kwargs)
-
- def _apply(
- self,
- func: Callable[..., Any],
- name: str,
- numeric_only: bool = False,
- numba_args: tuple[Any, ...] = (),
- **kwargs,
- ) -> DataFrame | Series:
- result = super()._apply(
- func,
- name,
- numeric_only,
- numba_args,
- **kwargs,
- )
- # Reconstruct the resulting MultiIndex
- # 1st set of levels = group by labels
- # 2nd set of levels = original DataFrame/Series index
- grouped_object_index = self.obj.index
- grouped_index_name = [*grouped_object_index.names]
- groupby_keys = copy.copy(self._grouper.names)
- result_index_names = groupby_keys + grouped_index_name
-
- drop_columns = [
- key
- for key in self._grouper.names
- if key not in self.obj.index.names or key is None
- ]
-
- if len(drop_columns) != len(groupby_keys):
- # Our result will have still kept the column in the result
- result = result.drop(columns=drop_columns, errors="ignore")
-
- codes = self._grouper.codes
- levels = copy.copy(self._grouper.levels)
-
- group_indices = self._grouper.indices.values()
- if group_indices:
- indexer = np.concatenate(list(group_indices))
- else:
- indexer = np.array([], dtype=np.intp)
- codes = [c.take(indexer) for c in codes]
-
- # if the index of the original dataframe needs to be preserved, append
- # this index (but reordered) to the codes/levels from the groupby
- if grouped_object_index is not None:
- idx = grouped_object_index.take(indexer)
- if not isinstance(idx, MultiIndex):
- idx = MultiIndex.from_arrays([idx])
- codes.extend(list(idx.codes))
- levels.extend(list(idx.levels))
-
- result_index = MultiIndex(
- levels, codes, names=result_index_names, verify_integrity=False
- )
-
- result.index = result_index
- if not self._as_index:
- result = result.reset_index(level=list(range(len(groupby_keys))))
- return result
-
- def _apply_pairwise(
- self,
- target: DataFrame | Series,
- other: DataFrame | Series | None,
- pairwise: bool | None,
- func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],
- numeric_only: bool,
- ) -> DataFrame | Series:
- """
- Apply the given pairwise function given 2 pandas objects (DataFrame/Series)
- """
- # Manually drop the grouping column first
- target = target.drop(columns=self._grouper.names, errors="ignore")
- result = super()._apply_pairwise(target, other, pairwise, func, numeric_only)
- # 1) Determine the levels + codes of the groupby levels
- if other is not None and not all(
- len(group) == len(other) for group in self._grouper.indices.values()
- ):
- # GH 42915
- # len(other) != len(any group), so must reindex (expand) the result
- # from flex_binary_moment to a "transform"-like result
- # per groupby combination
- old_result_len = len(result)
- result = concat(
- [
- result.take(gb_indices).reindex(result.index)
- for gb_indices in self._grouper.indices.values()
- ]
- )
-
- gb_pairs = (
- com.maybe_make_list(pair) for pair in self._grouper.indices.keys()
- )
- groupby_codes = []
- groupby_levels = []
- # e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]]
- for gb_level_pair in map(list, zip(*gb_pairs)):
- labels = np.repeat(np.array(gb_level_pair), old_result_len)
- codes, levels = factorize(labels)
- groupby_codes.append(codes)
- groupby_levels.append(levels)
- else:
- # pairwise=True or len(other) == len(each group), so repeat
- # the groupby labels by the number of columns in the original object
- groupby_codes = self._grouper.codes
- # error: Incompatible types in assignment (expression has type
- # "List[Index]", variable has type "List[Union[ndarray, Index]]")
- groupby_levels = self._grouper.levels # type: ignore[assignment]
-
- group_indices = self._grouper.indices.values()
- if group_indices:
- indexer = np.concatenate(list(group_indices))
- else:
- indexer = np.array([], dtype=np.intp)
-
- if target.ndim == 1:
- repeat_by = 1
- else:
- repeat_by = len(target.columns)
- groupby_codes = [
- np.repeat(c.take(indexer), repeat_by) for c in groupby_codes
- ]
- # 2) Determine the levels + codes of the result from super()._apply_pairwise
- if isinstance(result.index, MultiIndex):
- result_codes = list(result.index.codes)
- result_levels = list(result.index.levels)
- result_names = list(result.index.names)
- else:
- idx_codes, idx_levels = factorize(result.index)
- result_codes = [idx_codes]
- result_levels = [idx_levels]
- result_names = [result.index.name]
-
- # 3) Create the resulting index by combining 1) + 2)
- result_codes = groupby_codes + result_codes
- result_levels = groupby_levels + result_levels
- result_names = self._grouper.names + result_names
-
- result_index = MultiIndex(
- result_levels, result_codes, names=result_names, verify_integrity=False
- )
- result.index = result_index
- return result
-
- def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:
- """
- Split data into blocks & return conformed data.
- """
- # Ensure the object we're rolling over is monotonically sorted relative
- # to the groups
- # GH 36197
- if not obj.empty:
- groupby_order = np.concatenate(list(self._grouper.indices.values())).astype(
- np.int64
- )
- obj = obj.take(groupby_order)
- return super()._create_data(obj, numeric_only)
-
- def _gotitem(self, key, ndim, subset=None):
- # we are setting the index on the actual object
- # here so our index is carried through to the selected obj
- # when we do the splitting for the groupby
- if self.on is not None:
- # GH 43355
- subset = self.obj.set_index(self._on)
- return super()._gotitem(key, ndim, subset=subset)
-
-
-class Window(BaseWindow):
- """
- Provide rolling window calculations.
-
- Parameters
- ----------
- window : int, timedelta, str, offset, or BaseIndexer subclass
- Interval of the moving window.
-
- If an integer, the delta between the start and end of each window.
- The number of points in the window depends on the ``closed`` argument.
-
- If a timedelta, str, or offset, the time period of each window. Each
- window will be a variable sized based on the observations included in
- the time-period. This is only valid for datetimelike indexes.
- To learn more about the offsets & frequency strings, please see
- :ref:`this link`.
-
- If a BaseIndexer subclass, the window boundaries
- based on the defined ``get_window_bounds`` method. Additional rolling
- keyword arguments, namely ``min_periods``, ``center``, ``closed`` and
- ``step`` will be passed to ``get_window_bounds``.
-
- min_periods : int, default None
- Minimum number of observations in window required to have a value;
- otherwise, result is ``np.nan``.
-
- For a window that is specified by an offset, ``min_periods`` will default to 1.
-
- For a window that is specified by an integer, ``min_periods`` will default
- to the size of the window.
-
- center : bool, default False
- If False, set the window labels as the right edge of the window index.
-
- If True, set the window labels as the center of the window index.
-
- win_type : str, default None
- If ``None``, all points are evenly weighted.
-
- If a string, it must be a valid `scipy.signal window function
- `__.
-
- Certain Scipy window types require additional parameters to be passed
- in the aggregation function. The additional parameters must match
- the keywords specified in the Scipy window type method signature.
-
- on : str, optional
- For a DataFrame, a column label or Index level on which
- to calculate the rolling window, rather than the DataFrame's index.
-
- Provided integer column is ignored and excluded from result since
- an integer index is not used to calculate the rolling window.
-
- closed : str, default None
- Determines the inclusivity of points in the window
-
- If ``'right'``, uses the window (first, last] meaning the last point
- is included in the calculations.
-
- If ``'left'``, uses the window [first, last) meaning the first point
- is included in the calculations.
-
- If ``'both'``, uses the window [first, last] meaning all points in
- the window are included in the calculations.
-
- If ``'neither'``, uses the window (first, last) meaning the first
- and last points in the window are excluded from calculations.
-
- () and [] are referencing open and closed set
- notation respetively.
-
- Default ``None`` (``'right'``).
-
- step : int, default None
- Evaluate the window at every ``step`` result, equivalent to slicing as
- ``[::step]``. ``window`` must be an integer. Using a step argument other
- than None or 1 will produce a result with a different shape than the input.
-
- .. versionadded:: 1.5.0
-
- method : str {'single', 'table'}, default 'single'
-
- .. versionadded:: 1.3.0
-
- Execute the rolling operation per single column or row (``'single'``)
- or over the entire object (``'table'``).
-
- This argument is only implemented when specifying ``engine='numba'``
- in the method call.
-
- Returns
- -------
- pandas.api.typing.Window or pandas.api.typing.Rolling
- An instance of Window is returned if ``win_type`` is passed. Otherwise,
- an instance of Rolling is returned.
-
- See Also
- --------
- expanding : Provides expanding transformations.
- ewm : Provides exponential weighted functions.
-
- Notes
- -----
- See :ref:`Windowing Operations ` for further usage details
- and examples.
-
- Examples
- --------
- >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
- >>> df
- B
- 0 0.0
- 1 1.0
- 2 2.0
- 3 NaN
- 4 4.0
-
- **window**
-
- Rolling sum with a window length of 2 observations.
-
- >>> df.rolling(2).sum()
- B
- 0 NaN
- 1 1.0
- 2 3.0
- 3 NaN
- 4 NaN
-
- Rolling sum with a window span of 2 seconds.
-
- >>> df_time = pd.DataFrame(
- ... {"B": [0, 1, 2, np.nan, 4]},
- ... index=[
- ... pd.Timestamp("20130101 09:00:00"),
- ... pd.Timestamp("20130101 09:00:02"),
- ... pd.Timestamp("20130101 09:00:03"),
- ... pd.Timestamp("20130101 09:00:05"),
- ... pd.Timestamp("20130101 09:00:06"),
- ... ],
- ... )
-
- >>> df_time
- B
- 2013-01-01 09:00:00 0.0
- 2013-01-01 09:00:02 1.0
- 2013-01-01 09:00:03 2.0
- 2013-01-01 09:00:05 NaN
- 2013-01-01 09:00:06 4.0
-
- >>> df_time.rolling("2s").sum()
- B
- 2013-01-01 09:00:00 0.0
- 2013-01-01 09:00:02 1.0
- 2013-01-01 09:00:03 3.0
- 2013-01-01 09:00:05 NaN
- 2013-01-01 09:00:06 4.0
-
- Rolling sum with forward looking windows with 2 observations.
-
- >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)
- >>> df.rolling(window=indexer, min_periods=1).sum()
- B
- 0 1.0
- 1 3.0
- 2 2.0
- 3 4.0
- 4 4.0
-
- **min_periods**
-
- Rolling sum with a window length of 2 observations, but only needs a minimum of 1
- observation to calculate a value.
-
- >>> df.rolling(2, min_periods=1).sum()
- B
- 0 0.0
- 1 1.0
- 2 3.0
- 3 2.0
- 4 4.0
-
- **center**
-
- Rolling sum with the result assigned to the center of the window index.
-
- >>> df.rolling(3, min_periods=1, center=True).sum()
- B
- 0 1.0
- 1 3.0
- 2 3.0
- 3 6.0
- 4 4.0
-
- >>> df.rolling(3, min_periods=1, center=False).sum()
- B
- 0 0.0
- 1 1.0
- 2 3.0
- 3 3.0
- 4 6.0
-
- **step**
-
- Rolling sum with a window length of 2 observations, minimum of 1 observation to
- calculate a value, and a step of 2.
-
- >>> df.rolling(2, min_periods=1, step=2).sum()
- B
- 0 0.0
- 2 3.0
- 4 4.0
-
- **win_type**
-
- Rolling sum with a window length of 2, using the Scipy ``'gaussian'``
- window type. ``std`` is required in the aggregation function.
-
- >>> df.rolling(2, win_type="gaussian").sum(std=3)
- B
- 0 NaN
- 1 0.986207
- 2 2.958621
- 3 NaN
- 4 NaN
-
- **on**
-
- Rolling sum with a window length of 2 days.
-
- >>> df = pd.DataFrame(
- ... {
- ... "A": [
- ... pd.to_datetime("2020-01-01"),
- ... pd.to_datetime("2020-01-01"),
- ... pd.to_datetime("2020-01-02"),
- ... ],
- ... "B": [1, 2, 3],
- ... },
- ... index=pd.date_range("2020", periods=3),
- ... )
-
- >>> df
- A B
- 2020-01-01 2020-01-01 1
- 2020-01-02 2020-01-01 2
- 2020-01-03 2020-01-02 3
-
- >>> df.rolling("2D", on="A").sum()
- A B
- 2020-01-01 2020-01-01 1.0
- 2020-01-02 2020-01-01 3.0
- 2020-01-03 2020-01-02 6.0
- """
-
- _attributes = [
- "window",
- "min_periods",
- "center",
- "win_type",
- "on",
- "closed",
- "step",
- "method",
- ]
-
- def _validate(self) -> None:
- super()._validate()
-
- if not isinstance(self.win_type, str):
- raise ValueError(f"Invalid win_type {self.win_type}")
- signal = import_optional_dependency(
- "scipy.signal.windows", extra="Scipy is required to generate window weight."
- )
- self._scipy_weight_generator = getattr(signal, self.win_type, None)
- if self._scipy_weight_generator is None:
- raise ValueError(f"Invalid win_type {self.win_type}")
-
- if isinstance(self.window, BaseIndexer):
- raise NotImplementedError(
- "BaseIndexer subclasses not implemented with win_types."
- )
- if not is_integer(self.window) or self.window < 0:
- raise ValueError("window must be an integer 0 or greater")
-
- if self.method != "single":
- raise NotImplementedError("'single' is the only supported method type.")
-
- def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray:
- """
- Center the result in the window for weighted rolling aggregations.
- """
- if offset > 0:
- lead_indexer = [slice(offset, None)]
- result = np.copy(result[tuple(lead_indexer)])
- return result
-
- def _apply(
- self,
- func: Callable[[np.ndarray, int, int], np.ndarray],
- name: str,
- numeric_only: bool = False,
- numba_args: tuple[Any, ...] = (),
- **kwargs,
- ):
- """
- Rolling with weights statistical measure using supplied function.
-
- Designed to be used with passed-in Cython array-based functions.
-
- Parameters
- ----------
- func : callable function to apply
- name : str,
- numeric_only : bool, default False
- Whether to only operate on bool, int, and float columns
- numba_args : tuple
- unused
- **kwargs
- additional arguments for scipy windows if necessary
-
- Returns
- -------
- y : type of input
- """
- # "None" not callable [misc]
- window = self._scipy_weight_generator( # type: ignore[misc]
- self.window, **kwargs
- )
- offset = (len(window) - 1) // 2 if self.center else 0
-
- def homogeneous_func(values: np.ndarray):
- # calculation function
-
- if values.size == 0:
- return values.copy()
-
- def calc(x):
- additional_nans = np.full(offset, np.nan)
- x = np.concatenate((x, additional_nans))
- return func(
- x,
- window,
- self.min_periods if self.min_periods is not None else len(window),
- )
-
- with np.errstate(all="ignore"):
- # Our weighted aggregations return memoryviews
- result = np.asarray(calc(values))
-
- if self.center:
- result = self._center_window(result, offset)
-
- return result
-
- return self._apply_columnwise(homogeneous_func, name, numeric_only)[
- :: self.step
- ]
-
- @doc(
- _shared_docs["aggregate"],
- see_also=dedent(
- """
- See Also
- --------
- DataFrame.aggregate : Similar DataFrame method.
- Series.aggregate : Similar Series method.
- """
- ),
- examples=dedent(
- """
- Examples
- --------
- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
- >>> df
- A B C
- 0 1 4 7
- 1 2 5 8
- 2 3 6 9
-
- >>> df.rolling(2, win_type="boxcar").agg("mean")
- A B C
- 0 NaN NaN NaN
- 1 1.5 4.5 7.5
- 2 2.5 5.5 8.5
- """
- ),
- klass="Series/DataFrame",
- axis="",
- )
- def aggregate(self, func=None, *args, **kwargs):
- result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
- if result is None:
- # these must apply directly
- result = func(self)
-
- return result
-
- agg = aggregate
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- kwargs_scipy,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """\
- >>> ser = pd.Series([0, 1, 5, 2, 8])
-
- To get an instance of :class:`~pandas.core.window.rolling.Window` we need
- to pass the parameter `win_type`.
-
- >>> type(ser.rolling(2, win_type='gaussian'))
-
-
- In order to use the `SciPy` Gaussian window we need to provide the parameters
- `M` and `std`. The parameter `M` corresponds to 2 in our example.
- We pass the second parameter `std` as a parameter of the following method
- (`sum` in this case):
-
- >>> ser.rolling(2, win_type='gaussian').sum(std=3)
- 0 NaN
- 1 0.986207
- 2 5.917243
- 3 6.903450
- 4 9.862071
- dtype: float64
- """
- ),
- window_method="rolling",
- aggregation_description="weighted window sum",
- agg_method="sum",
- )
- def sum(self, numeric_only: bool = False, **kwargs):
- window_func = window_aggregations.roll_weighted_sum
- # error: Argument 1 to "_apply" of "Window" has incompatible type
- # "Callable[[ndarray, ndarray, int], ndarray]"; expected
- # "Callable[[ndarray, int, int], ndarray]"
- return self._apply(
- window_func, # type: ignore[arg-type]
- name="sum",
- numeric_only=numeric_only,
- **kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- kwargs_scipy,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """\
- >>> ser = pd.Series([0, 1, 5, 2, 8])
-
- To get an instance of :class:`~pandas.core.window.rolling.Window` we need
- to pass the parameter `win_type`.
-
- >>> type(ser.rolling(2, win_type='gaussian'))
-
-
- In order to use the `SciPy` Gaussian window we need to provide the parameters
- `M` and `std`. The parameter `M` corresponds to 2 in our example.
- We pass the second parameter `std` as a parameter of the following method:
-
- >>> ser.rolling(2, win_type='gaussian').mean(std=3)
- 0 NaN
- 1 0.5
- 2 3.0
- 3 3.5
- 4 5.0
- dtype: float64
- """
- ),
- window_method="rolling",
- aggregation_description="weighted window mean",
- agg_method="mean",
- )
- def mean(self, numeric_only: bool = False, **kwargs):
- window_func = window_aggregations.roll_weighted_mean
- # error: Argument 1 to "_apply" of "Window" has incompatible type
- # "Callable[[ndarray, ndarray, int], ndarray]"; expected
- # "Callable[[ndarray, int, int], ndarray]"
- return self._apply(
- window_func, # type: ignore[arg-type]
- name="mean",
- numeric_only=numeric_only,
- **kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- dedent(
- """
- ddof : int, default 1
- Delta Degrees of Freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- kwargs_scipy,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """\
- >>> ser = pd.Series([0, 1, 5, 2, 8])
-
- To get an instance of :class:`~pandas.core.window.rolling.Window` we need
- to pass the parameter `win_type`.
-
- >>> type(ser.rolling(2, win_type='gaussian'))
-
-
- In order to use the `SciPy` Gaussian window we need to provide the parameters
- `M` and `std`. The parameter `M` corresponds to 2 in our example.
- We pass the second parameter `std` as a parameter of the following method:
-
- >>> ser.rolling(2, win_type='gaussian').var(std=3)
- 0 NaN
- 1 0.5
- 2 8.0
- 3 4.5
- 4 18.0
- dtype: float64
- """
- ),
- window_method="rolling",
- aggregation_description="weighted window variance",
- agg_method="var",
- )
- def var(self, ddof: int = 1, numeric_only: bool = False, **kwargs):
- window_func = partial(window_aggregations.roll_weighted_var, ddof=ddof)
- kwargs.pop("name", None)
- return self._apply(window_func, name="var", numeric_only=numeric_only, **kwargs)
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- dedent(
- """
- ddof : int, default 1
- Delta Degrees of Freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- kwargs_scipy,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """\
- >>> ser = pd.Series([0, 1, 5, 2, 8])
-
- To get an instance of :class:`~pandas.core.window.rolling.Window` we need
- to pass the parameter `win_type`.
-
- >>> type(ser.rolling(2, win_type='gaussian'))
-
-
- In order to use the `SciPy` Gaussian window we need to provide the parameters
- `M` and `std`. The parameter `M` corresponds to 2 in our example.
- We pass the second parameter `std` as a parameter of the following method:
-
- >>> ser.rolling(2, win_type='gaussian').std(std=3)
- 0 NaN
- 1 0.707107
- 2 2.828427
- 3 2.121320
- 4 4.242641
- dtype: float64
- """
- ),
- window_method="rolling",
- aggregation_description="weighted window standard deviation",
- agg_method="std",
- )
- def std(self, ddof: int = 1, numeric_only: bool = False, **kwargs):
- return zsqrt(
- self.var(ddof=ddof, name="std", numeric_only=numeric_only, **kwargs)
- )
-
-
-class RollingAndExpandingMixin(BaseWindow):
- def count(self, numeric_only: bool = False):
- window_func = window_aggregations.roll_sum
- return self._apply(window_func, name="count", numeric_only=numeric_only)
-
- def apply(
- self,
- func: Callable[..., Any],
- raw: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- args: tuple[Any, ...] | None = None,
- kwargs: dict[str, Any] | None = None,
- ):
- if args is None:
- args = ()
- if kwargs is None:
- kwargs = {}
-
- if not is_bool(raw):
- raise ValueError("raw parameter must be `True` or `False`")
-
- numba_args: tuple[Any, ...] = ()
- if maybe_use_numba(engine):
- if raw is False:
- raise ValueError("raw must be `True` when using the numba engine")
- numba_args, kwargs = prepare_function_arguments(
- func, args, kwargs, num_required_args=1
- )
- if self.method == "single":
- apply_func = generate_numba_apply_func(
- func, **get_jit_arguments(engine_kwargs)
- )
- else:
- apply_func = generate_numba_table_func(
- func, **get_jit_arguments(engine_kwargs)
- )
- elif engine in ("cython", None):
- if engine_kwargs is not None:
- raise ValueError("cython engine does not accept engine_kwargs")
- apply_func = self._generate_cython_apply_func(args, kwargs, raw, func)
- else:
- raise ValueError("engine must be either 'numba' or 'cython'")
-
- return self._apply(
- apply_func,
- name="apply",
- numba_args=numba_args,
- )
-
- def _generate_cython_apply_func(
- self,
- args: tuple[Any, ...],
- kwargs: dict[str, Any],
- raw: bool | np.bool_,
- function: Callable[..., Any],
- ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]:
- from pandas import Series
-
- window_func = partial(
- window_aggregations.roll_apply,
- args=args,
- kwargs=kwargs,
- raw=bool(raw),
- function=function,
- )
-
- def apply_func(values, begin, end, min_periods, raw=raw):
- if not raw:
- # GH 45912
- values = Series(values, index=self._on, copy=False)
- return window_func(values, begin, end, min_periods)
-
- return apply_func
-
- @overload
- def pipe(
- self,
- func: Callable[Concatenate[Self, P], T],
- *args: P.args,
- **kwargs: P.kwargs,
- ) -> T: ...
-
- @overload
- def pipe(
- self,
- func: tuple[Callable[..., T], str],
- *args: Any,
- **kwargs: Any,
- ) -> T: ...
-
- def pipe(
- self,
- func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
- *args: Any,
- **kwargs: Any,
- ) -> T:
- return com.pipe(self, func, *args, **kwargs)
-
- def sum(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- if maybe_use_numba(engine):
- if self.method == "table":
- func = generate_manual_numpy_nan_agg_with_axis(np.nansum)
- return self.apply(
- func,
- raw=True,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
- else:
- from pandas.core._numba.kernels import sliding_sum
-
- return self._numba_apply(sliding_sum, engine_kwargs)
- window_func = window_aggregations.roll_sum
- return self._apply(window_func, name="sum", numeric_only=numeric_only)
-
- def max(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- if maybe_use_numba(engine):
- if self.method == "table":
- func = generate_manual_numpy_nan_agg_with_axis(np.nanmax)
- return self.apply(
- func,
- raw=True,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
- else:
- from pandas.core._numba.kernels import sliding_min_max
-
- return self._numba_apply(sliding_min_max, engine_kwargs, is_max=True)
- window_func = window_aggregations.roll_max
- return self._apply(window_func, name="max", numeric_only=numeric_only)
-
- def min(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- if maybe_use_numba(engine):
- if self.method == "table":
- func = generate_manual_numpy_nan_agg_with_axis(np.nanmin)
- return self.apply(
- func,
- raw=True,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
- else:
- from pandas.core._numba.kernels import sliding_min_max
-
- return self._numba_apply(sliding_min_max, engine_kwargs, is_max=False)
- window_func = window_aggregations.roll_min
- return self._apply(window_func, name="min", numeric_only=numeric_only)
-
- def mean(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- if maybe_use_numba(engine):
- if self.method == "table":
- func = generate_manual_numpy_nan_agg_with_axis(np.nanmean)
- return self.apply(
- func,
- raw=True,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
- else:
- from pandas.core._numba.kernels import sliding_mean
-
- return self._numba_apply(sliding_mean, engine_kwargs)
- window_func = window_aggregations.roll_mean
- return self._apply(window_func, name="mean", numeric_only=numeric_only)
-
- def median(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- if maybe_use_numba(engine):
- if self.method == "table":
- func = generate_manual_numpy_nan_agg_with_axis(np.nanmedian)
- else:
- func = np.nanmedian
-
- return self.apply(
- func,
- raw=True,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
- window_func = window_aggregations.roll_median_c
- return self._apply(window_func, name="median", numeric_only=numeric_only)
-
- def std(
- self,
- ddof: int = 1,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- if maybe_use_numba(engine):
- if self.method == "table":
- raise NotImplementedError("std not supported with method='table'")
- from pandas.core._numba.kernels import sliding_var
-
- return zsqrt(self._numba_apply(sliding_var, engine_kwargs, ddof=ddof))
- window_func = window_aggregations.roll_var
-
- def zsqrt_func(values, begin, end, min_periods):
- return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof))
-
- return self._apply(
- zsqrt_func,
- name="std",
- numeric_only=numeric_only,
- )
-
- def var(
- self,
- ddof: int = 1,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- if maybe_use_numba(engine):
- if self.method == "table":
- raise NotImplementedError("var not supported with method='table'")
- from pandas.core._numba.kernels import sliding_var
-
- return self._numba_apply(sliding_var, engine_kwargs, ddof=ddof)
- window_func = partial(window_aggregations.roll_var, ddof=ddof)
- return self._apply(
- window_func,
- name="var",
- numeric_only=numeric_only,
- )
-
- def skew(self, numeric_only: bool = False):
- window_func = window_aggregations.roll_skew
- return self._apply(
- window_func,
- name="skew",
- numeric_only=numeric_only,
- )
-
- def sem(self, ddof: int = 1, numeric_only: bool = False):
- # Raise here so error message says sem instead of std
- self._validate_numeric_only("sem", numeric_only)
- return self.std(numeric_only=numeric_only) / (
- self.count(numeric_only=numeric_only) - ddof
- ).pow(0.5)
-
- def kurt(self, numeric_only: bool = False):
- window_func = window_aggregations.roll_kurt
- return self._apply(
- window_func,
- name="kurt",
- numeric_only=numeric_only,
- )
-
- def first(self, numeric_only: bool = False):
- window_func = window_aggregations.roll_first
- return self._apply(
- window_func,
- name="first",
- numeric_only=numeric_only,
- )
-
- def last(self, numeric_only: bool = False):
- window_func = window_aggregations.roll_last
- return self._apply(
- window_func,
- name="last",
- numeric_only=numeric_only,
- )
-
- def quantile(
- self,
- q: float,
- interpolation: QuantileInterpolation = "linear",
- numeric_only: bool = False,
- ):
- if q == 1.0:
- window_func = window_aggregations.roll_max
- elif q == 0.0:
- window_func = window_aggregations.roll_min
- else:
- window_func = partial(
- window_aggregations.roll_quantile,
- quantile=q,
- interpolation=interpolation,
- )
-
- return self._apply(window_func, name="quantile", numeric_only=numeric_only)
-
- def rank(
- self,
- method: WindowingRankType = "average",
- ascending: bool = True,
- pct: bool = False,
- numeric_only: bool = False,
- ):
- window_func = partial(
- window_aggregations.roll_rank,
- method=method,
- ascending=ascending,
- percentile=pct,
- )
-
- return self._apply(window_func, name="rank", numeric_only=numeric_only)
-
- def nunique(
- self,
- numeric_only: bool = False,
- ):
- window_func = partial(
- window_aggregations.roll_nunique,
- )
-
- return self._apply(window_func, name="nunique", numeric_only=numeric_only)
-
- def cov(
- self,
- other: DataFrame | Series | None = None,
- pairwise: bool | None = None,
- ddof: int = 1,
- numeric_only: bool = False,
- ):
- if self.step is not None:
- raise NotImplementedError("step not implemented for cov")
- self._validate_numeric_only("cov", numeric_only)
-
- from pandas import Series
-
- def cov_func(x, y):
- x_array = self._prep_values(x)
- y_array = self._prep_values(y)
- window_indexer = self._get_window_indexer()
- min_periods = (
- self.min_periods
- if self.min_periods is not None
- else window_indexer.window_size
- )
- start, end = window_indexer.get_window_bounds(
- num_values=len(x_array),
- min_periods=min_periods,
- center=self.center,
- closed=self.closed,
- step=self.step,
- )
- self._check_window_bounds(start, end, len(x_array))
-
- with np.errstate(all="ignore"):
- mean_x_y = window_aggregations.roll_mean(
- x_array * y_array, start, end, min_periods
- )
- mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)
- mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)
- count_x_y = window_aggregations.roll_sum(
- notna(x_array + y_array).astype(np.float64), start, end, 0
- )
- result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof))
- return Series(result, index=x.index, name=x.name, copy=False)
-
- return self._apply_pairwise(
- self._selected_obj, other, pairwise, cov_func, numeric_only
- )
-
- def corr(
- self,
- other: DataFrame | Series | None = None,
- pairwise: bool | None = None,
- ddof: int = 1,
- numeric_only: bool = False,
- ):
- if self.step is not None:
- raise NotImplementedError("step not implemented for corr")
- self._validate_numeric_only("corr", numeric_only)
-
- from pandas import Series
-
- def corr_func(x, y):
- x_array = self._prep_values(x)
- y_array = self._prep_values(y)
- window_indexer = self._get_window_indexer()
- min_periods = (
- self.min_periods
- if self.min_periods is not None
- else window_indexer.window_size
- )
- start, end = window_indexer.get_window_bounds(
- num_values=len(x_array),
- min_periods=min_periods,
- center=self.center,
- closed=self.closed,
- step=self.step,
- )
- self._check_window_bounds(start, end, len(x_array))
-
- with np.errstate(all="ignore"):
- mean_x_y = window_aggregations.roll_mean(
- x_array * y_array, start, end, min_periods
- )
- mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)
- mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)
- count_x_y = window_aggregations.roll_sum(
- notna(x_array + y_array).astype(np.float64), start, end, 0
- )
- x_var = window_aggregations.roll_var(
- x_array, start, end, min_periods, ddof
- )
- y_var = window_aggregations.roll_var(
- y_array, start, end, min_periods, ddof
- )
- numerator = (mean_x_y - mean_x * mean_y) * (
- count_x_y / (count_x_y - ddof)
- )
- denominator = (x_var * y_var) ** 0.5
- result = numerator / denominator
- return Series(result, index=x.index, name=x.name, copy=False)
-
- return self._apply_pairwise(
- self._selected_obj, other, pairwise, corr_func, numeric_only
- )
-
-
-class Rolling(RollingAndExpandingMixin):
- _attributes: list[str] = [
- "window",
- "min_periods",
- "center",
- "win_type",
- "on",
- "closed",
- "step",
- "method",
- ]
-
- def _validate(self) -> None:
- super()._validate()
-
- # we allow rolling on a datetimelike index
- if (
- self.obj.empty
- or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))
- or (isinstance(self._on.dtype, ArrowDtype) and self._on.dtype.kind in "mM")
- ) and isinstance(self.window, (str, BaseOffset, timedelta)):
- self._validate_datetimelike_monotonic()
-
- # this will raise ValueError on non-fixed freqs
- try:
- freq = to_offset(self.window)
- except (TypeError, ValueError) as err:
- raise ValueError(
- f"passed window {self.window} is not "
- "compatible with a datetimelike index"
- ) from err
- if isinstance(self._on, PeriodIndex):
- # error: Incompatible types in assignment (expression has type
- # "float", variable has type "Optional[int]")
- self._win_freq_i8 = freq.nanos / ( # type: ignore[assignment]
- self._on.freq.nanos / self._on.freq.n
- )
- else:
- try:
- unit = dtype_to_unit(self._on.dtype) # type: ignore[arg-type]
- except TypeError:
- # if not a datetime dtype, eg for empty dataframes
- unit = "ns"
- self._win_freq_i8 = Timedelta(freq.nanos).as_unit(unit)._value
-
- # min_periods must be an integer
- if self.min_periods is None:
- self.min_periods = 1
-
- if self.step is not None:
- raise NotImplementedError(
- "step is not supported with frequency windows"
- )
-
- elif isinstance(self.window, BaseIndexer):
- # Passed BaseIndexer subclass should handle all other rolling kwargs
- pass
- elif not is_integer(self.window) or self.window < 0:
- raise ValueError("window must be an integer 0 or greater")
-
- def _validate_datetimelike_monotonic(self) -> None:
- """
- Validate self._on is monotonic (increasing or decreasing) and has
- no NaT values for frequency windows.
- """
- if self._on.hasnans:
- self._raise_monotonic_error("values must not have NaT")
- if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
- self._raise_monotonic_error("values must be monotonic")
-
- def _raise_monotonic_error(self, msg: str):
- on = self.on
- if on is None:
- on = "index"
- raise ValueError(f"{on} {msg}")
-
- @doc(
- _shared_docs["aggregate"],
- see_also=dedent(
- """
- See Also
- --------
- Series.rolling : Calling object with Series data.
- DataFrame.rolling : Calling object with DataFrame data.
- """
- ),
- examples=dedent(
- """
- Examples
- --------
- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
- >>> df
- A B C
- 0 1 4 7
- 1 2 5 8
- 2 3 6 9
-
- >>> df.rolling(2).sum()
- A B C
- 0 NaN NaN NaN
- 1 3.0 9.0 15.0
- 2 5.0 11.0 17.0
-
- >>> df.rolling(2).agg({"A": "sum", "B": "min"})
- A B
- 0 NaN NaN
- 1 3.0 4.0
- 2 5.0 5.0
- """
- ),
- klass="Series/Dataframe",
- axis="",
- )
- def aggregate(self, func=None, *args, **kwargs):
- return super().aggregate(func, *args, **kwargs)
-
- agg = aggregate
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """
- >>> s = pd.Series([2, 3, np.nan, 10])
- >>> s.rolling(2).count()
- 0 NaN
- 1 2.0
- 2 1.0
- 3 1.0
- dtype: float64
- >>> s.rolling(3).count()
- 0 NaN
- 1 NaN
- 2 2.0
- 3 2.0
- dtype: float64
- >>> s.rolling(4).count()
- 0 NaN
- 1 NaN
- 2 NaN
- 3 3.0
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="count of non NaN observations",
- agg_method="count",
- )
- def count(self, numeric_only: bool = False):
- return super().count(numeric_only)
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- window_apply_parameters,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """\
- >>> ser = pd.Series([1, 6, 5, 4])
- >>> ser.rolling(2).apply(lambda s: s.sum() - s.min())
- 0 NaN
- 1 6.0
- 2 6.0
- 3 5.0
- dtype: float64
- """
- ),
- window_method="rolling",
- aggregation_description="custom aggregation function",
- agg_method="apply",
- )
- def apply(
- self,
- func: Callable[..., Any],
- raw: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- args: tuple[Any, ...] | None = None,
- kwargs: dict[str, Any] | None = None,
- ):
- return super().apply(
- func,
- raw=raw,
- engine=engine,
- engine_kwargs=engine_kwargs,
- args=args,
- kwargs=kwargs,
- )
-
- @overload
- def pipe(
- self,
- func: Callable[Concatenate[Self, P], T],
- *args: P.args,
- **kwargs: P.kwargs,
- ) -> T: ...
-
- @overload
- def pipe(
- self,
- func: tuple[Callable[..., T], str],
- *args: Any,
- **kwargs: Any,
- ) -> T: ...
-
- @final
- @Substitution(
- klass="Rolling",
- examples="""
- >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
- ... index=pd.date_range('2012-08-02', periods=4))
- >>> df
- A
- 2012-08-02 1
- 2012-08-03 2
- 2012-08-04 3
- 2012-08-05 4
-
- To get the difference between each rolling 2-day window's maximum and minimum
- value in one pass, you can do
-
- >>> df.rolling('2D').pipe(lambda x: x.max() - x.min())
- A
- 2012-08-02 0.0
- 2012-08-03 1.0
- 2012-08-04 1.0
- 2012-08-05 1.0""",
- )
- @Appender(template_pipe)
- def pipe(
- self,
- func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
- *args: Any,
- **kwargs: Any,
- ) -> T:
- return super().pipe(func, *args, **kwargs)
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- window_agg_numba_parameters(),
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Notes"),
- numba_notes,
- create_section_header("Examples"),
- dedent(
- """
- >>> s = pd.Series([1, 2, 3, 4, 5])
- >>> s
- 0 1
- 1 2
- 2 3
- 3 4
- 4 5
- dtype: int64
-
- >>> s.rolling(3).sum()
- 0 NaN
- 1 NaN
- 2 6.0
- 3 9.0
- 4 12.0
- dtype: float64
-
- >>> s.rolling(3, center=True).sum()
- 0 NaN
- 1 6.0
- 2 9.0
- 3 12.0
- 4 NaN
- dtype: float64
-
- For DataFrame, each sum is computed column-wise.
-
- >>> df = pd.DataFrame({{"A": s, "B": s ** 2}})
- >>> df
- A B
- 0 1 1
- 1 2 4
- 2 3 9
- 3 4 16
- 4 5 25
-
- >>> df.rolling(3).sum()
- A B
- 0 NaN NaN
- 1 NaN NaN
- 2 6.0 14.0
- 3 9.0 29.0
- 4 12.0 50.0
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="sum",
- agg_method="sum",
- )
- def sum(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- return super().sum(
- numeric_only=numeric_only,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- dedent(
- """
- *args : iterable, optional
- Positional arguments passed into ``func``.\n
- """
- ).replace("\n", "", 1),
- window_agg_numba_parameters(),
- dedent(
- """
- **kwargs : mapping, optional
- A dictionary of keyword arguments passed into ``func``.\n
- """
- ).replace("\n", "", 1),
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Notes"),
- numba_notes,
- create_section_header("Examples"),
- dedent(
- """\
- >>> ser = pd.Series([1, 2, 3, 4])
- >>> ser.rolling(2).max()
- 0 NaN
- 1 2.0
- 2 3.0
- 3 4.0
- dtype: float64
- """
- ),
- window_method="rolling",
- aggregation_description="maximum",
- agg_method="max",
- )
- def max(
- self,
- numeric_only: bool = False,
- *args,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- **kwargs,
- ):
- return super().max(
- numeric_only=numeric_only,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- window_agg_numba_parameters(),
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Notes"),
- numba_notes,
- create_section_header("Examples"),
- dedent(
- """
- Performing a rolling minimum with a window size of 3.
-
- >>> s = pd.Series([4, 3, 5, 2, 6])
- >>> s.rolling(3).min()
- 0 NaN
- 1 NaN
- 2 3.0
- 3 2.0
- 4 2.0
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="minimum",
- agg_method="min",
- )
- def min(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- return super().min(
- numeric_only=numeric_only,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- window_agg_numba_parameters(),
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Notes"),
- numba_notes,
- create_section_header("Examples"),
- dedent(
- """
- The below examples will show rolling mean calculations with window sizes of
- two and three, respectively.
-
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s.rolling(2).mean()
- 0 NaN
- 1 1.5
- 2 2.5
- 3 3.5
- dtype: float64
-
- >>> s.rolling(3).mean()
- 0 NaN
- 1 NaN
- 2 2.0
- 3 3.0
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="mean",
- agg_method="mean",
- )
- def mean(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- return super().mean(
- numeric_only=numeric_only,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- window_agg_numba_parameters(),
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Notes"),
- numba_notes,
- create_section_header("Examples"),
- dedent(
- """
- Compute the rolling median of a series with a window size of 3.
-
- >>> s = pd.Series([0, 1, 2, 3, 4])
- >>> s.rolling(3).median()
- 0 NaN
- 1 NaN
- 2 1.0
- 3 2.0
- 4 3.0
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="median",
- agg_method="median",
- )
- def median(
- self,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- return super().median(
- numeric_only=numeric_only,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- dedent(
- """
- ddof : int, default 1
- Delta Degrees of Freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- window_agg_numba_parameters("1.4"),
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- "numpy.std : Equivalent method for NumPy array.\n",
- template_see_also,
- create_section_header("Notes"),
- dedent(
- """
- The default ``ddof`` of 1 used in :meth:`Series.std` is different
- than the default ``ddof`` of 0 in :func:`numpy.std`.
-
- A minimum of one period is required for the rolling calculation.\n
- """
- ).replace("\n", "", 1),
- create_section_header("Examples"),
- dedent(
- """
- >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
- >>> s.rolling(3).std()
- 0 NaN
- 1 NaN
- 2 0.577350
- 3 1.000000
- 4 1.000000
- 5 1.154701
- 6 0.000000
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="standard deviation",
- agg_method="std",
- )
- def std(
- self,
- ddof: int = 1,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- return super().std(
- ddof=ddof,
- numeric_only=numeric_only,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- dedent(
- """
- ddof : int, default 1
- Delta Degrees of Freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- window_agg_numba_parameters("1.4"),
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- "numpy.var : Equivalent method for NumPy array.\n",
- template_see_also,
- create_section_header("Notes"),
- dedent(
- """
- The default ``ddof`` of 1 used in :meth:`Series.var` is different
- than the default ``ddof`` of 0 in :func:`numpy.var`.
-
- A minimum of one period is required for the rolling calculation.\n
- """
- ).replace("\n", "", 1),
- create_section_header("Examples"),
- dedent(
- """
- >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
- >>> s.rolling(3).var()
- 0 NaN
- 1 NaN
- 2 0.333333
- 3 1.000000
- 4 1.000000
- 5 1.333333
- 6 0.000000
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="variance",
- agg_method="var",
- )
- def var(
- self,
- ddof: int = 1,
- numeric_only: bool = False,
- engine: Literal["cython", "numba"] | None = None,
- engine_kwargs: dict[str, bool] | None = None,
- ):
- return super().var(
- ddof=ddof,
- numeric_only=numeric_only,
- engine=engine,
- engine_kwargs=engine_kwargs,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- "scipy.stats.skew : Third moment of a probability density.\n",
- template_see_also,
- create_section_header("Notes"),
- dedent(
- """
- A minimum of three periods is required for the rolling calculation.\n
- """
- ),
- create_section_header("Examples"),
- dedent(
- """\
- >>> ser = pd.Series([1, 5, 2, 7, 15, 6])
- >>> ser.rolling(3).skew().round(6)
- 0 NaN
- 1 NaN
- 2 1.293343
- 3 -0.585583
- 4 0.670284
- 5 1.652317
- dtype: float64
- """
- ),
- window_method="rolling",
- aggregation_description="unbiased skewness",
- agg_method="skew",
- )
- def skew(self, numeric_only: bool = False):
- return super().skew(numeric_only=numeric_only)
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- dedent(
- """
- ddof : int, default 1
- Delta Degrees of Freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Notes"),
- "A minimum of one period is required for the calculation.\n\n",
- create_section_header("Examples"),
- dedent(
- """
- >>> s = pd.Series([0, 1, 2, 3])
- >>> s.rolling(2, min_periods=1).sem()
- 0 NaN
- 1 0.707107
- 2 0.707107
- 3 0.707107
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="standard error of mean",
- agg_method="sem",
- )
- def sem(self, ddof: int = 1, numeric_only: bool = False):
- # Raise here so error message says sem instead of std
- self._validate_numeric_only("sem", numeric_only)
- return self.std(numeric_only=numeric_only) / (
- self.count(numeric_only) - ddof
- ).pow(0.5)
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- "scipy.stats.kurtosis : Reference SciPy method.\n",
- template_see_also,
- create_section_header("Notes"),
- "A minimum of four periods is required for the calculation.\n\n",
- create_section_header("Examples"),
- dedent(
- """
- The example below will show a rolling calculation with a window size of
- four matching the equivalent function call using `scipy.stats`.
-
- >>> arr = [1, 2, 3, 4, 999]
- >>> import scipy.stats
- >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")
- -1.200000
- >>> print(f"{{scipy.stats.kurtosis(arr[1:], bias=False):.6f}}")
- 3.999946
- >>> s = pd.Series(arr)
- >>> s.rolling(4).kurt()
- 0 NaN
- 1 NaN
- 2 NaN
- 3 -1.200000
- 4 3.999946
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="Fisher's definition of kurtosis without bias",
- agg_method="kurt",
- )
- def kurt(self, numeric_only: bool = False):
- return super().kurt(numeric_only=numeric_only)
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- dedent(
- """
- GroupBy.first : Similar method for GroupBy objects.
- Rolling.last : Method to get the last element in each window.\n
- """
- ).replace("\n", "", 1),
- create_section_header("Examples"),
- dedent(
- """
- The example below will show a rolling calculation with a window size of
- three.
-
- >>> s = pd.Series(range(5))
- >>> s.rolling(3).first()
- 0 NaN
- 1 NaN
- 2 0.0
- 3 1.0
- 4 2.0
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="First (left-most) element of the window",
- agg_method="first",
- )
- def first(self, numeric_only: bool = False):
- return super().first(numeric_only=numeric_only)
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- dedent(
- """
- GroupBy.last : Similar method for GroupBy objects.
- Rolling.first : Method to get the first element in each window.\n
- """
- ).replace("\n", "", 1),
- create_section_header("Examples"),
- dedent(
- """
- The example below will show a rolling calculation with a window size of
- three.
-
- >>> s = pd.Series(range(5))
- >>> s.rolling(3).last()
- 0 NaN
- 1 NaN
- 2 2.0
- 3 3.0
- 4 4.0
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="Last (right-most) element of the window",
- agg_method="last",
- )
- def last(self, numeric_only: bool = False):
- return super().last(numeric_only=numeric_only)
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- dedent(
- """
- q : float
- Quantile to compute. 0 <= quantile <= 1.
-
- .. deprecated:: 2.1.0
- This was renamed from 'quantile' to 'q' in version 2.1.0.
- interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}
- This optional parameter specifies the interpolation method to use,
- when the desired quantile lies between two data points `i` and `j`:
-
- * linear: `i + (j - i) * fraction`, where `fraction` is the
- fractional part of the index surrounded by `i` and `j`.
- * lower: `i`.
- * higher: `j`.
- * nearest: `i` or `j` whichever is nearest.
- * midpoint: (`i` + `j`) / 2.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """
- >>> s = pd.Series([1, 2, 3, 4])
- >>> s.rolling(2).quantile(.4, interpolation='lower')
- 0 NaN
- 1 1.0
- 2 2.0
- 3 3.0
- dtype: float64
-
- >>> s.rolling(2).quantile(.4, interpolation='midpoint')
- 0 NaN
- 1 1.5
- 2 2.5
- 3 3.5
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="quantile",
- agg_method="quantile",
- )
- def quantile(
- self,
- q: float,
- interpolation: QuantileInterpolation = "linear",
- numeric_only: bool = False,
- ):
- return super().quantile(
- q=q,
- interpolation=interpolation,
- numeric_only=numeric_only,
- )
-
- @doc(
- template_header,
- ".. versionadded:: 1.4.0 \n\n",
- create_section_header("Parameters"),
- dedent(
- """
- method : {{'average', 'min', 'max'}}, default 'average'
- How to rank the group of records that have the same value (i.e. ties):
-
- * average: average rank of the group
- * min: lowest rank in the group
- * max: highest rank in the group
-
- ascending : bool, default True
- Whether or not the elements should be ranked in ascending order.
- pct : bool, default False
- Whether or not to display the returned rankings in percentile
- form.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """
- >>> s = pd.Series([1, 4, 2, 3, 5, 3])
- >>> s.rolling(3).rank()
- 0 NaN
- 1 NaN
- 2 2.0
- 3 2.0
- 4 3.0
- 5 1.5
- dtype: float64
-
- >>> s.rolling(3).rank(method="max")
- 0 NaN
- 1 NaN
- 2 2.0
- 3 2.0
- 4 3.0
- 5 2.0
- dtype: float64
-
- >>> s.rolling(3).rank(method="min")
- 0 NaN
- 1 NaN
- 2 2.0
- 3 2.0
- 4 3.0
- 5 1.0
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="rank",
- agg_method="rank",
- )
- def rank(
- self,
- method: WindowingRankType = "average",
- ascending: bool = True,
- pct: bool = False,
- numeric_only: bool = False,
- ):
- return super().rank(
- method=method,
- ascending=ascending,
- pct=pct,
- numeric_only=numeric_only,
- )
-
- @doc(
- template_header,
- ".. versionadded:: 3.0.0 \n\n",
- create_section_header("Parameters"),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """
- >>> s = pd.Series([1, 4, 2, np.nan, 3, 3, 4, 5])
- >>> s.rolling(3).nunique()
- 0 NaN
- 1 NaN
- 2 3.0
- 3 NaN
- 4 NaN
- 5 NaN
- 6 2.0
- 7 3.0
- dtype: float64
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="nunique",
- agg_method="nunique",
- )
- def nunique(
- self,
- numeric_only: bool = False,
- ):
- return super().nunique(
- numeric_only=numeric_only,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- dedent(
- """
- other : Series or DataFrame, optional
- If not supplied then will default to self and produce pairwise
- output.
- pairwise : bool, default None
- If False then only matching columns between self and other will be
- used and the output will be a DataFrame.
- If True then all pairwise combinations will be calculated and the
- output will be a MultiIndexed DataFrame in the case of DataFrame
- inputs. In the case of missing elements, only complete pairwise
- observations will be used.
- ddof : int, default 1
- Delta Degrees of Freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- template_see_also,
- create_section_header("Examples"),
- dedent(
- """\
- >>> ser1 = pd.Series([1, 2, 3, 4])
- >>> ser2 = pd.Series([1, 4, 5, 8])
- >>> ser1.rolling(2).cov(ser2)
- 0 NaN
- 1 1.5
- 2 0.5
- 3 1.5
- dtype: float64
- """
- ),
- window_method="rolling",
- aggregation_description="sample covariance",
- agg_method="cov",
- )
- def cov(
- self,
- other: DataFrame | Series | None = None,
- pairwise: bool | None = None,
- ddof: int = 1,
- numeric_only: bool = False,
- ):
- return super().cov(
- other=other,
- pairwise=pairwise,
- ddof=ddof,
- numeric_only=numeric_only,
- )
-
- @doc(
- template_header,
- create_section_header("Parameters"),
- dedent(
- """
- other : Series or DataFrame, optional
- If not supplied then will default to self and produce pairwise
- output.
- pairwise : bool, default None
- If False then only matching columns between self and other will be
- used and the output will be a DataFrame.
- If True then all pairwise combinations will be calculated and the
- output will be a MultiIndexed DataFrame in the case of DataFrame
- inputs. In the case of missing elements, only complete pairwise
- observations will be used.
- ddof : int, default 1
- Delta Degrees of Freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- """
- ).replace("\n", "", 1),
- kwargs_numeric_only,
- create_section_header("Returns"),
- template_returns,
- create_section_header("See Also"),
- dedent(
- """
- cov : Similar method to calculate covariance.
- numpy.corrcoef : NumPy Pearson's correlation calculation.
- """
- ).replace("\n", "", 1),
- template_see_also,
- create_section_header("Notes"),
- dedent(
- """
- This function uses Pearson's definition of correlation
- (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
-
- When `other` is not specified, the output will be self correlation (e.g.
- all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
- set to `True`.
-
- Function will return ``NaN`` for correlations of equal valued sequences;
- this is the result of a 0/0 division error.
-
- When `pairwise` is set to `False`, only matching columns between `self` and
- `other` will be used.
-
- When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
- with the original index on the first level, and the `other` DataFrame
- columns on the second level.
-
- In the case of missing elements, only complete pairwise observations
- will be used.\n
- """
- ).replace("\n", "", 1),
- create_section_header("Examples"),
- dedent(
- """
- The below example shows a rolling calculation with a window size of
- four matching the equivalent function call using :meth:`numpy.corrcoef`.
-
- >>> v1 = [3, 3, 3, 5, 8]
- >>> v2 = [3, 4, 4, 4, 8]
- >>> np.corrcoef(v1[:-1], v2[:-1])
- array([[1. , 0.33333333],
- [0.33333333, 1. ]])
- >>> np.corrcoef(v1[1:], v2[1:])
- array([[1. , 0.9169493],
- [0.9169493, 1. ]])
- >>> s1 = pd.Series(v1)
- >>> s2 = pd.Series(v2)
- >>> s1.rolling(4).corr(s2)
- 0 NaN
- 1 NaN
- 2 NaN
- 3 0.333333
- 4 0.916949
- dtype: float64
-
- The below example shows a similar rolling calculation on a
- DataFrame using the pairwise option.
-
- >>> matrix = np.array([[51., 35.],
- ... [49., 30.],
- ... [47., 32.],
- ... [46., 31.],
- ... [50., 36.]])
- >>> np.corrcoef(matrix[:-1, 0], matrix[:-1, 1])
- array([[1. , 0.6263001],
- [0.6263001, 1. ]])
- >>> np.corrcoef(matrix[1:, 0], matrix[1:, 1])
- array([[1. , 0.55536811],
- [0.55536811, 1. ]])
- >>> df = pd.DataFrame(matrix, columns=['X', 'Y'])
- >>> df
- X Y
- 0 51.0 35.0
- 1 49.0 30.0
- 2 47.0 32.0
- 3 46.0 31.0
- 4 50.0 36.0
- >>> df.rolling(4).corr(pairwise=True)
- X Y
- 0 X NaN NaN
- Y NaN NaN
- 1 X NaN NaN
- Y NaN NaN
- 2 X NaN NaN
- Y NaN NaN
- 3 X 1.000000 0.626300
- Y 0.626300 1.000000
- 4 X 1.000000 0.555368
- Y 0.555368 1.000000
- """
- ).replace("\n", "", 1),
- window_method="rolling",
- aggregation_description="correlation",
- agg_method="corr",
- )
- def corr(
- self,
- other: DataFrame | Series | None = None,
- pairwise: bool | None = None,
- ddof: int = 1,
- numeric_only: bool = False,
- ):
- return super().corr(
- other=other,
- pairwise=pairwise,
- ddof=ddof,
- numeric_only=numeric_only,
- )
-
-
-Rolling.__doc__ = Window.__doc__
-
-
-class RollingGroupby(BaseWindowGroupby, Rolling):
- """
- Provide a rolling groupby implementation.
- """
-
- _attributes = Rolling._attributes + BaseWindowGroupby._attributes
-
- def _get_window_indexer(self) -> GroupbyIndexer:
- """
- Return an indexer class that will compute the window start and end bounds
-
- Returns
- -------
- GroupbyIndexer
- """
- rolling_indexer: type[BaseIndexer]
- indexer_kwargs: dict[str, Any] | None = None
- index_array = self._index_array
- if isinstance(self.window, BaseIndexer):
- rolling_indexer = type(self.window)
- indexer_kwargs = self.window.__dict__.copy()
- assert isinstance(indexer_kwargs, dict) # for mypy
- # We'll be using the index of each group later
- indexer_kwargs.pop("index_array", None)
- window = self.window
- elif self._win_freq_i8 is not None:
- rolling_indexer = VariableWindowIndexer
- # error: Incompatible types in assignment (expression has type
- # "int", variable has type "BaseIndexer")
- window = self._win_freq_i8 # type: ignore[assignment]
- else:
- rolling_indexer = FixedWindowIndexer
- window = self.window
- window_indexer = GroupbyIndexer(
- index_array=index_array,
- window_size=window,
- groupby_indices=self._grouper.indices,
- window_indexer=rolling_indexer,
- indexer_kwargs=indexer_kwargs,
- )
- return window_indexer
-
- def _validate_datetimelike_monotonic(self) -> None:
- """
- Validate that each group in self._on is monotonic
- """
- # GH 46061
- if self._on.hasnans:
- self._raise_monotonic_error("values must not have NaT")
- for group_indices in self._grouper.indices.values():
- group_on = self._on.take(group_indices)
- if not (
- group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
- ):
- on = "index" if self.on is None else self.on
- raise ValueError(
- f"Each group within {on} must be monotonic. "
- f"Sort the values in {on} first."
- )
+"""
+Provide a generic structure to support window functions,
+similar to how we have a Groupby object.
+"""
+
+from __future__ import annotations
+
+import copy
+from datetime import timedelta
+from functools import partial
+import inspect
+from textwrap import dedent
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Literal,
+ final,
+ overload,
+)
+
+import numpy as np
+
+from pandas._libs.tslibs import (
+ BaseOffset,
+ Timedelta,
+ to_offset,
+)
+import pandas._libs.window.aggregations as window_aggregations
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import DataError
+from pandas.util._decorators import (
+ Appender,
+ Substitution,
+ doc,
+)
+
+from pandas.core.dtypes.common import (
+ ensure_float64,
+ is_bool,
+ is_integer,
+ is_numeric_dtype,
+ needs_i8_conversion,
+)
+from pandas.core.dtypes.dtypes import ArrowDtype
+from pandas.core.dtypes.generic import (
+ ABCDataFrame,
+ ABCSeries,
+)
+from pandas.core.dtypes.missing import notna
+
+from pandas import (
+ DataFrame,
+ Series,
+)
+from pandas.core._numba import executor
+from pandas.core.algorithms import factorize
+from pandas.core.apply import (
+ ResamplerWindowApply,
+ reconstruct_func,
+)
+from pandas.core.arrays import ExtensionArray
+from pandas.core.base import SelectionMixin
+import pandas.core.common as com
+from pandas.core.indexers.objects import (
+ BaseIndexer,
+ FixedWindowIndexer,
+ GroupbyIndexer,
+ VariableWindowIndexer,
+)
+from pandas.core.indexes.api import (
+ DatetimeIndex,
+ Index,
+ MultiIndex,
+ PeriodIndex,
+ TimedeltaIndex,
+)
+from pandas.core.reshape.concat import concat
+from pandas.core.util.numba_ import (
+ get_jit_arguments,
+ maybe_use_numba,
+ prepare_function_arguments,
+)
+from pandas.core.window.common import (
+ flex_binary_moment,
+ zsqrt,
+)
+from pandas.core.window.doc import (
+ _shared_docs,
+ create_section_header,
+ kwargs_numeric_only,
+ kwargs_scipy,
+ numba_notes,
+ template_header,
+ template_pipe,
+ template_returns,
+ template_see_also,
+ window_agg_numba_parameters,
+ window_apply_parameters,
+)
+from pandas.core.window.numba_ import (
+ generate_manual_numpy_nan_agg_with_axis,
+ generate_numba_apply_func,
+ generate_numba_table_func,
+)
+
+if TYPE_CHECKING:
+ from collections.abc import Callable
+ from collections.abc import (
+ Hashable,
+ Iterator,
+ Sized,
+ )
+
+ from pandas._typing import (
+ ArrayLike,
+ Concatenate,
+ NDFrameT,
+ QuantileInterpolation,
+ P,
+ Self,
+ T,
+ WindowingRankType,
+ npt,
+ )
+
+ from pandas.core.generic import NDFrame
+ from pandas.core.groupby.ops import BaseGrouper
+
+from pandas.core.arrays.datetimelike import dtype_to_unit
+
+
+class BaseWindow(SelectionMixin):
+ """Provides utilities for performing windowing operations."""
+
+ _attributes: list[str] = []
+ exclusions: frozenset[Hashable] = frozenset()
+ _on: Index
+
+ def __init__(
+ self,
+ obj: NDFrame,
+ window=None,
+ min_periods: int | None = None,
+ center: bool | None = False,
+ win_type: str | None = None,
+ on: str | Index | None = None,
+ closed: str | None = None,
+ step: int | None = None,
+ method: str = "single",
+ *,
+ selection=None,
+ ) -> None:
+ self.obj = obj
+ self.on = on
+ self.closed = closed
+ self.step = step
+ self.window = window
+ self.min_periods = min_periods
+ self.center = center
+ self.win_type = win_type
+ self.method = method
+ self._win_freq_i8: int | None = None
+ if self.on is None:
+ self._on = self.obj.index
+ elif isinstance(self.on, Index):
+ self._on = self.on
+ elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
+ self._on = Index(self.obj[self.on])
+ else:
+ raise ValueError(
+ f"invalid on specified as {self.on}, "
+ "must be a column (of DataFrame), an Index or None"
+ )
+
+ self._selection = selection
+ self._validate()
+
+ def _validate(self) -> None:
+ if self.center is not None and not is_bool(self.center):
+ raise ValueError("center must be a boolean")
+ if self.min_periods is not None:
+ if not is_integer(self.min_periods):
+ raise ValueError("min_periods must be an integer")
+ if self.min_periods < 0:
+ raise ValueError("min_periods must be >= 0")
+ if is_integer(self.window) and self.min_periods > self.window:
+ raise ValueError(
+ f"min_periods {self.min_periods} must be <= window {self.window}"
+ )
+ if self.closed is not None and self.closed not in [
+ "right",
+ "both",
+ "left",
+ "neither",
+ ]:
+ raise ValueError("closed must be 'right', 'left', 'both' or 'neither'")
+ if not isinstance(self.obj, (ABCSeries, ABCDataFrame)):
+ raise TypeError(f"invalid type: {type(self)}")
+ if isinstance(self.window, BaseIndexer):
+ # Validate that the passed BaseIndexer subclass has
+ # a get_window_bounds with the correct signature.
+ get_window_bounds_signature = inspect.signature(
+ self.window.get_window_bounds
+ ).parameters.keys()
+ expected_signature = inspect.signature(
+ BaseIndexer().get_window_bounds
+ ).parameters.keys()
+ if get_window_bounds_signature != expected_signature:
+ raise ValueError(
+ f"{type(self.window).__name__} does not implement "
+ f"the correct signature for get_window_bounds"
+ )
+ if self.method not in ["table", "single"]:
+ raise ValueError("method must be 'table' or 'single")
+ if self.step is not None:
+ if not is_integer(self.step):
+ raise ValueError("step must be an integer")
+ if self.step < 0:
+ raise ValueError("step must be >= 0")
+
+ def _check_window_bounds(
+ self, start: np.ndarray, end: np.ndarray, num_vals: int
+ ) -> None:
+ if len(start) != len(end):
+ raise ValueError(
+ f"start ({len(start)}) and end ({len(end)}) bounds must be the "
+ f"same length"
+ )
+ if len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1):
+ raise ValueError(
+ f"start and end bounds ({len(start)}) must be the same length "
+ f"as the object ({num_vals}) divided by the step ({self.step}) "
+ f"if given and rounded up"
+ )
+
+ def _slice_axis_for_step(self, index: Index, result: Sized | None = None) -> Index:
+ """
+ Slices the index for a given result and the preset step.
+ """
+ return (
+ index
+ if result is None or len(result) == len(index)
+ else index[:: self.step]
+ )
+
+ def _validate_numeric_only(self, name: str, numeric_only: bool) -> None:
+ """
+ Validate numeric_only argument, raising if invalid for the input.
+
+ Parameters
+ ----------
+ name : str
+ Name of the operator (kernel).
+ numeric_only : bool
+ Value passed by user.
+ """
+ if (
+ self._selected_obj.ndim == 1
+ and numeric_only
+ and not is_numeric_dtype(self._selected_obj.dtype)
+ ):
+ raise NotImplementedError(
+ f"{type(self).__name__}.{name} does not implement numeric_only"
+ )
+
+ def _make_numeric_only(self, obj: NDFrameT) -> NDFrameT:
+ """Subset DataFrame to numeric columns.
+
+ Parameters
+ ----------
+ obj : DataFrame
+
+ Returns
+ -------
+ obj subset to numeric-only columns.
+ """
+ result = obj.select_dtypes(include=["number"], exclude=["timedelta"])
+ return result
+
+ def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:
+ """
+ Split data into blocks & return conformed data.
+ """
+ # filter out the on from the object
+ if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2:
+ obj = obj.reindex(columns=obj.columns.difference([self.on], sort=False))
+ if obj.ndim > 1 and numeric_only:
+ obj = self._make_numeric_only(obj)
+ return obj
+
+ def _gotitem(self, key, ndim, subset=None):
+ """
+ Sub-classes to define. Return a sliced object.
+
+ Parameters
+ ----------
+ key : str / list of selections
+ ndim : {1, 2}
+ requested ndim of result
+ subset : object, default None
+ subset to act on
+ """
+ # create a new object to prevent aliasing
+ if subset is None:
+ subset = self.obj
+
+ # we need to make a shallow copy of ourselves
+ # with the same groupby
+ kwargs = {attr: getattr(self, attr) for attr in self._attributes}
+
+ selection = self._infer_selection(key, subset)
+ new_win = type(self)(subset, selection=selection, **kwargs)
+ return new_win
+
+ def __getattr__(self, attr: str):
+ if attr in self._internal_names_set:
+ return object.__getattribute__(self, attr)
+ if attr in self.obj:
+ return self[attr]
+
+ raise AttributeError(
+ f"'{type(self).__name__}' object has no attribute '{attr}'"
+ )
+
+ def _dir_additions(self):
+ return self.obj._dir_additions()
+
+ def __repr__(self) -> str:
+ """
+ Provide a nice str repr of our rolling object.
+ """
+ attrs_list = (
+ f"{attr_name}={getattr(self, attr_name)}"
+ for attr_name in self._attributes
+ if getattr(self, attr_name, None) is not None and attr_name[0] != "_"
+ )
+ attrs = ",".join(attrs_list)
+ return f"{type(self).__name__} [{attrs}]"
+
+ def __iter__(self) -> Iterator:
+ obj = self._selected_obj.set_axis(self._on)
+ obj = self._create_data(obj)
+ indexer = self._get_window_indexer()
+
+ start, end = indexer.get_window_bounds(
+ num_values=len(obj),
+ min_periods=self.min_periods,
+ center=self.center,
+ closed=self.closed,
+ step=self.step,
+ )
+ self._check_window_bounds(start, end, len(obj))
+
+ for s, e in zip(start, end):
+ result = obj.iloc[slice(s, e)]
+ yield result
+
+ def _prep_values(self, values: ArrayLike) -> np.ndarray:
+ """Convert input to numpy arrays for Cython routines"""
+ if needs_i8_conversion(values.dtype):
+ raise NotImplementedError(
+ f"ops for {type(self).__name__} for this "
+ f"dtype {values.dtype} are not implemented"
+ )
+ # GH #12373 : rolling functions error on float32 data
+ # make sure the data is coerced to float64
+ try:
+ if isinstance(values, ExtensionArray):
+ values = values.to_numpy(np.float64, na_value=np.nan)
+ else:
+ values = ensure_float64(values)
+ except (ValueError, TypeError) as err:
+ raise TypeError(f"cannot handle this type -> {values.dtype}") from err
+
+ # Convert inf to nan for C funcs
+ inf = np.isinf(values)
+ if inf.any():
+ values = np.where(inf, np.nan, values)
+
+ return values
+
+ def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None:
+ # if we have an 'on' column we want to put it back into
+ # the results in the same location
+ from pandas import Series
+
+ if self.on is not None and not self._on.equals(obj.index):
+ name = self._on.name
+ extra_col = Series(self._on, index=self.obj.index, name=name, copy=False)
+ if name in result.columns:
+ # TODO: sure we want to overwrite results?
+ result[name] = extra_col
+ elif name in result.index.names:
+ pass
+ elif name in self._selected_obj.columns:
+ # insert in the same location as we had in _selected_obj
+ old_cols = self._selected_obj.columns
+ new_cols = result.columns
+ old_loc = old_cols.get_loc(name)
+ overlap = new_cols.intersection(old_cols[:old_loc])
+ new_loc = len(overlap)
+ result.insert(new_loc, name, extra_col)
+ else:
+ # insert at the end
+ result[name] = extra_col
+
+ @property
+ def _index_array(self) -> npt.NDArray[np.int64] | None:
+ # TODO: why do we get here with e.g. MultiIndex?
+ if isinstance(self._on, (PeriodIndex, DatetimeIndex, TimedeltaIndex)):
+ return self._on.asi8
+ elif isinstance(self._on.dtype, ArrowDtype) and self._on.dtype.kind in "mM":
+ return self._on.to_numpy(dtype=np.int64)
+ return None
+
+ def _resolve_output(self, out: DataFrame, obj: DataFrame) -> DataFrame:
+ """Validate and finalize result."""
+ if out.shape[1] == 0 and obj.shape[1] > 0:
+ raise DataError("No numeric types to aggregate")
+ if out.shape[1] == 0:
+ return obj.astype("float64")
+
+ self._insert_on_column(out, obj)
+ return out
+
+ def _get_window_indexer(self) -> BaseIndexer:
+ """
+ Return an indexer class that will compute the window start and end bounds
+ """
+ if isinstance(self.window, BaseIndexer):
+ return self.window
+ if self._win_freq_i8 is not None:
+ return VariableWindowIndexer(
+ index_array=self._index_array,
+ window_size=self._win_freq_i8,
+ center=self.center,
+ )
+ return FixedWindowIndexer(window_size=self.window)
+
+ def _apply_series(
+ self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None
+ ) -> Series:
+ """
+ Series version of _apply_columnwise
+ """
+ obj = self._create_data(self._selected_obj)
+
+ if name == "count":
+ # GH 12541: Special case for count where we support date-like types
+ obj = notna(obj).astype(int)
+ try:
+ values = self._prep_values(obj._values)
+ except (TypeError, NotImplementedError) as err:
+ raise DataError("No numeric types to aggregate") from err
+
+ result = homogeneous_func(values)
+ index = self._slice_axis_for_step(obj.index, result)
+ return obj._constructor(result, index=index, name=obj.name)
+
+ def _apply_columnwise(
+ self,
+ homogeneous_func: Callable[..., ArrayLike],
+ name: str,
+ numeric_only: bool = False,
+ ) -> DataFrame | Series:
+ """
+ Apply the given function to the DataFrame broken down into homogeneous
+ sub-frames.
+ """
+ self._validate_numeric_only(name, numeric_only)
+ if self._selected_obj.ndim == 1:
+ return self._apply_series(homogeneous_func, name)
+
+ obj = self._create_data(self._selected_obj, numeric_only)
+ if name == "count":
+ # GH 12541: Special case for count where we support date-like types
+ obj = notna(obj).astype(int)
+ obj._mgr = obj._mgr.consolidate()
+
+ taker = []
+ res_values = []
+ for i, arr in enumerate(obj._iter_column_arrays()):
+ # GH#42736 operate column-wise instead of block-wise
+ # As of 2.0, hfunc will raise for nuisance columns
+ try:
+ arr = self._prep_values(arr)
+ except (TypeError, NotImplementedError) as err:
+ raise DataError(
+ f"Cannot aggregate non-numeric type: {arr.dtype}"
+ ) from err
+ res = homogeneous_func(arr)
+ res_values.append(res)
+ taker.append(i)
+
+ index = self._slice_axis_for_step(
+ obj.index, res_values[0] if len(res_values) > 0 else None
+ )
+ df = type(obj)._from_arrays(
+ res_values,
+ index=index,
+ columns=obj.columns.take(taker),
+ verify_integrity=False,
+ )
+
+ return self._resolve_output(df, obj)
+
+ def _apply_tablewise(
+ self,
+ homogeneous_func: Callable[..., ArrayLike],
+ name: str | None = None,
+ numeric_only: bool = False,
+ ) -> DataFrame | Series:
+ """
+ Apply the given function to the DataFrame across the entire object
+ """
+ if self._selected_obj.ndim == 1:
+ raise ValueError("method='table' not applicable for Series objects.")
+ obj = self._create_data(self._selected_obj, numeric_only)
+ values = self._prep_values(obj.to_numpy())
+ result = homogeneous_func(values)
+ index = self._slice_axis_for_step(obj.index, result)
+ columns = (
+ obj.columns
+ if result.shape[1] == len(obj.columns)
+ else obj.columns[:: self.step]
+ )
+ out = obj._constructor(result, index=index, columns=columns)
+
+ return self._resolve_output(out, obj)
+
+ def _apply_pairwise(
+ self,
+ target: DataFrame | Series,
+ other: DataFrame | Series | None,
+ pairwise: bool | None,
+ func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],
+ numeric_only: bool,
+ ) -> DataFrame | Series:
+ """
+ Apply the given pairwise function given 2 pandas objects (DataFrame/Series)
+ """
+ target = self._create_data(target, numeric_only)
+ if other is None:
+ other = target
+ # only default unset
+ pairwise = True if pairwise is None else pairwise
+ elif not isinstance(other, (ABCDataFrame, ABCSeries)):
+ raise ValueError("other must be a DataFrame or Series")
+ elif other.ndim == 2 and numeric_only:
+ other = self._make_numeric_only(other)
+
+ return flex_binary_moment(target, other, func, pairwise=bool(pairwise))
+
+ def _apply(
+ self,
+ func: Callable[..., Any],
+ name: str,
+ numeric_only: bool = False,
+ numba_args: tuple[Any, ...] = (),
+ **kwargs,
+ ):
+ """
+ Rolling statistical measure using supplied function.
+
+ Designed to be used with passed-in Cython array-based functions.
+
+ Parameters
+ ----------
+ func : callable function to apply
+ name : str,
+ numba_args : tuple
+ args to be passed when func is a numba func
+ **kwargs
+ additional arguments for rolling function and window function
+
+ Returns
+ -------
+ y : type of input
+ """
+ window_indexer = self._get_window_indexer()
+ min_periods = (
+ self.min_periods
+ if self.min_periods is not None
+ else window_indexer.window_size
+ )
+
+ def homogeneous_func(values: np.ndarray):
+ # calculation function
+
+ if values.size == 0:
+ return values.copy()
+
+ def calc(x):
+ start, end = window_indexer.get_window_bounds(
+ num_values=len(x),
+ min_periods=min_periods,
+ center=self.center,
+ closed=self.closed,
+ step=self.step,
+ )
+ self._check_window_bounds(start, end, len(x))
+
+ return func(x, start, end, min_periods, *numba_args)
+
+ with np.errstate(all="ignore"):
+ result = calc(values)
+
+ return result
+
+ if self.method == "single":
+ return self._apply_columnwise(homogeneous_func, name, numeric_only)
+ else:
+ return self._apply_tablewise(homogeneous_func, name, numeric_only)
+
+ def _numba_apply(
+ self,
+ func: Callable[..., Any],
+ engine_kwargs: dict[str, bool] | None = None,
+ **func_kwargs,
+ ):
+ window_indexer = self._get_window_indexer()
+ min_periods = (
+ self.min_periods
+ if self.min_periods is not None
+ else window_indexer.window_size
+ )
+ obj = self._create_data(self._selected_obj)
+ values = self._prep_values(obj.to_numpy())
+ if values.ndim == 1:
+ values = values.reshape(-1, 1)
+ start, end = window_indexer.get_window_bounds(
+ num_values=len(values),
+ min_periods=min_periods,
+ center=self.center,
+ closed=self.closed,
+ step=self.step,
+ )
+ self._check_window_bounds(start, end, len(values))
+ # For now, map everything to float to match the Cython impl
+ # even though it is wrong
+ # TODO: Could preserve correct dtypes in future
+ # xref #53214
+ dtype_mapping = executor.float_dtype_mapping
+ aggregator = executor.generate_shared_aggregator(
+ func,
+ dtype_mapping,
+ is_grouped_kernel=False,
+ **get_jit_arguments(engine_kwargs),
+ )
+ result = aggregator(
+ values.T, start=start, end=end, min_periods=min_periods, **func_kwargs
+ ).T
+ index = self._slice_axis_for_step(obj.index, result)
+ if obj.ndim == 1:
+ result = result.squeeze()
+ out = obj._constructor(result, index=index, name=obj.name)
+ return out
+ else:
+ columns = self._slice_axis_for_step(obj.columns, result.T)
+ out = obj._constructor(result, index=index, columns=columns)
+ return self._resolve_output(out, obj)
+
+ def aggregate(self, func=None, *args, **kwargs):
+ relabeling, func, columns, order = reconstruct_func(func, **kwargs)
+ result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
+ if isinstance(result, ABCDataFrame) and relabeling:
+ result = result.iloc[:, order]
+ result.columns = columns # type: ignore[union-attr]
+ if result is None:
+ return self.apply(func, raw=False, args=args, kwargs=kwargs)
+ return result
+
+ agg = aggregate
+
+
+class BaseWindowGroupby(BaseWindow):
+ """
+ Provide the groupby windowing facilities.
+ """
+
+ _grouper: BaseGrouper
+ _as_index: bool
+ _attributes: list[str] = ["_grouper"]
+
+ def __init__(
+ self,
+ obj: DataFrame | Series,
+ *args,
+ _grouper: BaseGrouper,
+ _as_index: bool = True,
+ **kwargs,
+ ) -> None:
+ from pandas.core.groupby.ops import BaseGrouper
+
+ if not isinstance(_grouper, BaseGrouper):
+ raise ValueError("Must pass a BaseGrouper object.")
+ self._grouper = _grouper
+ self._as_index = _as_index
+ # GH 32262: It's convention to keep the grouping column in
+ # groupby., but unexpected to users in
+ # groupby.rolling.
+ obj = obj.drop(columns=self._grouper.names, errors="ignore")
+ # GH 15354
+ if kwargs.get("step") is not None:
+ raise NotImplementedError("step not implemented for groupby")
+ super().__init__(obj, *args, **kwargs)
+
+ def _apply(
+ self,
+ func: Callable[..., Any],
+ name: str,
+ numeric_only: bool = False,
+ numba_args: tuple[Any, ...] = (),
+ **kwargs,
+ ) -> DataFrame | Series:
+ result = super()._apply(
+ func,
+ name,
+ numeric_only,
+ numba_args,
+ **kwargs,
+ )
+ # Reconstruct the resulting MultiIndex
+ # 1st set of levels = group by labels
+ # 2nd set of levels = original DataFrame/Series index
+ grouped_object_index = self.obj.index
+ grouped_index_name = [*grouped_object_index.names]
+ groupby_keys = copy.copy(self._grouper.names)
+ result_index_names = groupby_keys + grouped_index_name
+
+ drop_columns = [
+ key
+ for key in self._grouper.names
+ if key not in self.obj.index.names or key is None
+ ]
+
+ if len(drop_columns) != len(groupby_keys):
+ # Our result will have still kept the column in the result
+ result = result.drop(columns=drop_columns, errors="ignore")
+
+ codes = self._grouper.codes
+ levels = copy.copy(self._grouper.levels)
+
+ group_indices = self._grouper.indices.values()
+ if group_indices:
+ indexer = np.concatenate(list(group_indices))
+ else:
+ indexer = np.array([], dtype=np.intp)
+ codes = [c.take(indexer) for c in codes]
+
+ # if the index of the original dataframe needs to be preserved, append
+ # this index (but reordered) to the codes/levels from the groupby
+ if grouped_object_index is not None:
+ idx = grouped_object_index.take(indexer)
+ if not isinstance(idx, MultiIndex):
+ idx = MultiIndex.from_arrays([idx])
+ codes.extend(list(idx.codes))
+ levels.extend(list(idx.levels))
+
+ result_index = MultiIndex(
+ levels, codes, names=result_index_names, verify_integrity=False
+ )
+
+ result.index = result_index
+ if not self._as_index:
+ result = result.reset_index(level=list(range(len(groupby_keys))))
+ return result
+
+ def _apply_pairwise(
+ self,
+ target: DataFrame | Series,
+ other: DataFrame | Series | None,
+ pairwise: bool | None,
+ func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],
+ numeric_only: bool,
+ ) -> DataFrame | Series:
+ """
+ Apply the given pairwise function given 2 pandas objects (DataFrame/Series)
+ """
+ # Manually drop the grouping column first
+ target = target.drop(columns=self._grouper.names, errors="ignore")
+ result = super()._apply_pairwise(target, other, pairwise, func, numeric_only)
+ # 1) Determine the levels + codes of the groupby levels
+ if other is not None and not all(
+ len(group) == len(other) for group in self._grouper.indices.values()
+ ):
+ # GH 42915
+ # len(other) != len(any group), so must reindex (expand) the result
+ # from flex_binary_moment to a "transform"-like result
+ # per groupby combination
+ old_result_len = len(result)
+ result = concat(
+ [
+ result.take(gb_indices).reindex(result.index)
+ for gb_indices in self._grouper.indices.values()
+ ]
+ )
+
+ gb_pairs = (
+ com.maybe_make_list(pair) for pair in self._grouper.indices.keys()
+ )
+ groupby_codes = []
+ groupby_levels = []
+ # e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]]
+ for gb_level_pair in map(list, zip(*gb_pairs)):
+ labels = np.repeat(np.array(gb_level_pair), old_result_len)
+ codes, levels = factorize(labels)
+ groupby_codes.append(codes)
+ groupby_levels.append(levels)
+ else:
+ # pairwise=True or len(other) == len(each group), so repeat
+ # the groupby labels by the number of columns in the original object
+ groupby_codes = self._grouper.codes
+ # error: Incompatible types in assignment (expression has type
+ # "List[Index]", variable has type "List[Union[ndarray, Index]]")
+ groupby_levels = self._grouper.levels # type: ignore[assignment]
+
+ group_indices = self._grouper.indices.values()
+ if group_indices:
+ indexer = np.concatenate(list(group_indices))
+ else:
+ indexer = np.array([], dtype=np.intp)
+
+ if target.ndim == 1:
+ repeat_by = 1
+ else:
+ repeat_by = len(target.columns)
+ groupby_codes = [
+ np.repeat(c.take(indexer), repeat_by) for c in groupby_codes
+ ]
+ # 2) Determine the levels + codes of the result from super()._apply_pairwise
+ if isinstance(result.index, MultiIndex):
+ result_codes = list(result.index.codes)
+ result_levels = list(result.index.levels)
+ result_names = list(result.index.names)
+ else:
+ idx_codes, idx_levels = factorize(result.index)
+ result_codes = [idx_codes]
+ result_levels = [idx_levels]
+ result_names = [result.index.name]
+
+ # 3) Create the resulting index by combining 1) + 2)
+ result_codes = groupby_codes + result_codes
+ result_levels = groupby_levels + result_levels
+ result_names = self._grouper.names + result_names
+
+ result_index = MultiIndex(
+ result_levels, result_codes, names=result_names, verify_integrity=False
+ )
+ result.index = result_index
+ return result
+
+ def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:
+ """
+ Split data into blocks & return conformed data.
+ """
+ # Ensure the object we're rolling over is monotonically sorted relative
+ # to the groups
+ # GH 36197
+ if not obj.empty:
+ groupby_order = np.concatenate(list(self._grouper.indices.values())).astype(
+ np.int64
+ )
+ obj = obj.take(groupby_order)
+ return super()._create_data(obj, numeric_only)
+
+ def _gotitem(self, key, ndim, subset=None):
+ # we are setting the index on the actual object
+ # here so our index is carried through to the selected obj
+ # when we do the splitting for the groupby
+ if self.on is not None:
+ # GH 43355
+ subset = self.obj.set_index(self._on)
+ return super()._gotitem(key, ndim, subset=subset)
+
+
+class Window(BaseWindow):
+ """
+ Provide rolling window calculations.
+
+ Parameters
+ ----------
+ window : int, timedelta, str, offset, or BaseIndexer subclass
+ Interval of the moving window.
+
+ If an integer, the delta between the start and end of each window.
+ The number of points in the window depends on the ``closed`` argument.
+
+ If a timedelta, str, or offset, the time period of each window. Each
+ window will be a variable sized based on the observations included in
+ the time-period. This is only valid for datetimelike indexes.
+ To learn more about the offsets & frequency strings, please see
+ :ref:`this link`.
+
+ If a BaseIndexer subclass, the window boundaries
+ based on the defined ``get_window_bounds`` method. Additional rolling
+ keyword arguments, namely ``min_periods``, ``center``, ``closed`` and
+ ``step`` will be passed to ``get_window_bounds``.
+
+ min_periods : int, default None
+ Minimum number of observations in window required to have a value;
+ otherwise, result is ``np.nan``.
+
+ For a window that is specified by an offset, ``min_periods`` will default to 1.
+
+ For a window that is specified by an integer, ``min_periods`` will default
+ to the size of the window.
+
+ center : bool, default False
+ If False, set the window labels as the right edge of the window index.
+
+ If True, set the window labels as the center of the window index.
+
+ win_type : str, default None
+ If ``None``, all points are evenly weighted.
+
+ If a string, it must be a valid `scipy.signal window function
+ `__.
+
+ Certain Scipy window types require additional parameters to be passed
+ in the aggregation function. The additional parameters must match
+ the keywords specified in the Scipy window type method signature.
+
+ on : str, optional
+ For a DataFrame, a column label or Index level on which
+ to calculate the rolling window, rather than the DataFrame's index.
+
+ Provided integer column is ignored and excluded from result since
+ an integer index is not used to calculate the rolling window.
+
+ closed : str, default None
+ Determines the inclusivity of points in the window
+
+ If ``'right'``, uses the window (first, last] meaning the last point
+ is included in the calculations.
+
+ If ``'left'``, uses the window [first, last) meaning the first point
+ is included in the calculations.
+
+ If ``'both'``, uses the window [first, last] meaning all points in
+ the window are included in the calculations.
+
+ If ``'neither'``, uses the window (first, last) meaning the first
+ and last points in the window are excluded from calculations.
+
+ () and [] are referencing open and closed set
+ notation respetively.
+
+ Default ``None`` (``'right'``).
+
+ step : int, default None
+ Evaluate the window at every ``step`` result, equivalent to slicing as
+ ``[::step]``. ``window`` must be an integer. Using a step argument other
+ than None or 1 will produce a result with a different shape than the input.
+
+ .. versionadded:: 1.5.0
+
+ method : str {'single', 'table'}, default 'single'
+
+ .. versionadded:: 1.3.0
+
+ Execute the rolling operation per single column or row (``'single'``)
+ or over the entire object (``'table'``).
+
+ This argument is only implemented when specifying ``engine='numba'``
+ in the method call.
+
+ Returns
+ -------
+ pandas.api.typing.Window or pandas.api.typing.Rolling
+ An instance of Window is returned if ``win_type`` is passed. Otherwise,
+ an instance of Rolling is returned.
+
+ See Also
+ --------
+ expanding : Provides expanding transformations.
+ ewm : Provides exponential weighted functions.
+
+ Notes
+ -----
+ See :ref:`Windowing Operations ` for further usage details
+ and examples.
+
+ Examples
+ --------
+ >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
+ >>> df
+ B
+ 0 0.0
+ 1 1.0
+ 2 2.0
+ 3 NaN
+ 4 4.0
+
+ **window**
+
+ Rolling sum with a window length of 2 observations.
+
+ >>> df.rolling(2).sum()
+ B
+ 0 NaN
+ 1 1.0
+ 2 3.0
+ 3 NaN
+ 4 NaN
+
+ Rolling sum with a window span of 2 seconds.
+
+ >>> df_time = pd.DataFrame(
+ ... {"B": [0, 1, 2, np.nan, 4]},
+ ... index=[
+ ... pd.Timestamp("20130101 09:00:00"),
+ ... pd.Timestamp("20130101 09:00:02"),
+ ... pd.Timestamp("20130101 09:00:03"),
+ ... pd.Timestamp("20130101 09:00:05"),
+ ... pd.Timestamp("20130101 09:00:06"),
+ ... ],
+ ... )
+
+ >>> df_time
+ B
+ 2013-01-01 09:00:00 0.0
+ 2013-01-01 09:00:02 1.0
+ 2013-01-01 09:00:03 2.0
+ 2013-01-01 09:00:05 NaN
+ 2013-01-01 09:00:06 4.0
+
+ >>> df_time.rolling("2s").sum()
+ B
+ 2013-01-01 09:00:00 0.0
+ 2013-01-01 09:00:02 1.0
+ 2013-01-01 09:00:03 3.0
+ 2013-01-01 09:00:05 NaN
+ 2013-01-01 09:00:06 4.0
+
+ Rolling sum with forward looking windows with 2 observations.
+
+ >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)
+ >>> df.rolling(window=indexer, min_periods=1).sum()
+ B
+ 0 1.0
+ 1 3.0
+ 2 2.0
+ 3 4.0
+ 4 4.0
+
+ **min_periods**
+
+ Rolling sum with a window length of 2 observations, but only needs a minimum of 1
+ observation to calculate a value.
+
+ >>> df.rolling(2, min_periods=1).sum()
+ B
+ 0 0.0
+ 1 1.0
+ 2 3.0
+ 3 2.0
+ 4 4.0
+
+ **center**
+
+ Rolling sum with the result assigned to the center of the window index.
+
+ >>> df.rolling(3, min_periods=1, center=True).sum()
+ B
+ 0 1.0
+ 1 3.0
+ 2 3.0
+ 3 6.0
+ 4 4.0
+
+ >>> df.rolling(3, min_periods=1, center=False).sum()
+ B
+ 0 0.0
+ 1 1.0
+ 2 3.0
+ 3 3.0
+ 4 6.0
+
+ **step**
+
+ Rolling sum with a window length of 2 observations, minimum of 1 observation to
+ calculate a value, and a step of 2.
+
+ >>> df.rolling(2, min_periods=1, step=2).sum()
+ B
+ 0 0.0
+ 2 3.0
+ 4 4.0
+
+ **win_type**
+
+ Rolling sum with a window length of 2, using the Scipy ``'gaussian'``
+ window type. ``std`` is required in the aggregation function.
+
+ >>> df.rolling(2, win_type="gaussian").sum(std=3)
+ B
+ 0 NaN
+ 1 0.986207
+ 2 2.958621
+ 3 NaN
+ 4 NaN
+
+ **on**
+
+ Rolling sum with a window length of 2 days.
+
+ >>> df = pd.DataFrame(
+ ... {
+ ... "A": [
+ ... pd.to_datetime("2020-01-01"),
+ ... pd.to_datetime("2020-01-01"),
+ ... pd.to_datetime("2020-01-02"),
+ ... ],
+ ... "B": [1, 2, 3],
+ ... },
+ ... index=pd.date_range("2020", periods=3),
+ ... )
+
+ >>> df
+ A B
+ 2020-01-01 2020-01-01 1
+ 2020-01-02 2020-01-01 2
+ 2020-01-03 2020-01-02 3
+
+ >>> df.rolling("2D", on="A").sum()
+ A B
+ 2020-01-01 2020-01-01 1.0
+ 2020-01-02 2020-01-01 3.0
+ 2020-01-03 2020-01-02 6.0
+ """
+
+ _attributes = [
+ "window",
+ "min_periods",
+ "center",
+ "win_type",
+ "on",
+ "closed",
+ "step",
+ "method",
+ ]
+
+ def _validate(self) -> None:
+ super()._validate()
+
+ if not isinstance(self.win_type, str):
+ raise ValueError(f"Invalid win_type {self.win_type}")
+ signal = import_optional_dependency(
+ "scipy.signal.windows", extra="Scipy is required to generate window weight."
+ )
+ self._scipy_weight_generator = getattr(signal, self.win_type, None)
+ if self._scipy_weight_generator is None:
+ raise ValueError(f"Invalid win_type {self.win_type}")
+
+ if isinstance(self.window, BaseIndexer):
+ raise NotImplementedError(
+ "BaseIndexer subclasses not implemented with win_types."
+ )
+ if not is_integer(self.window) or self.window < 0:
+ raise ValueError("window must be an integer 0 or greater")
+
+ if self.method != "single":
+ raise NotImplementedError("'single' is the only supported method type.")
+
+ def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray:
+ """
+ Center the result in the window for weighted rolling aggregations.
+ """
+ if offset > 0:
+ lead_indexer = [slice(offset, None)]
+ result = np.copy(result[tuple(lead_indexer)])
+ return result
+
+ def _apply(
+ self,
+ func: Callable[[np.ndarray, int, int], np.ndarray],
+ name: str,
+ numeric_only: bool = False,
+ numba_args: tuple[Any, ...] = (),
+ **kwargs,
+ ):
+ """
+ Rolling with weights statistical measure using supplied function.
+
+ Designed to be used with passed-in Cython array-based functions.
+
+ Parameters
+ ----------
+ func : callable function to apply
+ name : str,
+ numeric_only : bool, default False
+ Whether to only operate on bool, int, and float columns
+ numba_args : tuple
+ unused
+ **kwargs
+ additional arguments for scipy windows if necessary
+
+ Returns
+ -------
+ y : type of input
+ """
+ # "None" not callable [misc]
+ window = self._scipy_weight_generator( # type: ignore[misc]
+ self.window, **kwargs
+ )
+ offset = (len(window) - 1) // 2 if self.center else 0
+
+ def homogeneous_func(values: np.ndarray):
+ # calculation function
+
+ if values.size == 0:
+ return values.copy()
+
+ def calc(x):
+ additional_nans = np.full(offset, np.nan)
+ x = np.concatenate((x, additional_nans))
+ return func(
+ x,
+ window,
+ self.min_periods if self.min_periods is not None else len(window),
+ )
+
+ with np.errstate(all="ignore"):
+ # Our weighted aggregations return memoryviews
+ result = np.asarray(calc(values))
+
+ if self.center:
+ result = self._center_window(result, offset)
+
+ return result
+
+ result = self._apply_columnwise(homogeneous_func, name, numeric_only)
+ if self.step is not None and self.step > 1:
+ if isinstance(result, Series):
+ result = result.iloc[:: self.step]
+ elif isinstance(result, DataFrame):
+ result = result.iloc[:: self.step, :]
+ return result
+
+ @doc(
+ _shared_docs["aggregate"],
+ see_also=dedent(
+ """
+ See Also
+ --------
+ DataFrame.aggregate : Similar DataFrame method.
+ Series.aggregate : Similar Series method.
+ """
+ ),
+ examples=dedent(
+ """
+ Examples
+ --------
+ >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
+ >>> df
+ A B C
+ 0 1 4 7
+ 1 2 5 8
+ 2 3 6 9
+
+ >>> df.rolling(2, win_type="boxcar").agg("mean")
+ A B C
+ 0 NaN NaN NaN
+ 1 1.5 4.5 7.5
+ 2 2.5 5.5 8.5
+ """
+ ),
+ klass="Series/DataFrame",
+ axis="",
+ )
+ def aggregate(self, func=None, *args, **kwargs):
+ result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
+ if result is None:
+ # these must apply directly
+ result = func(self)
+
+ return result
+
+ agg = aggregate
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ kwargs_scipy,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """\
+ >>> ser = pd.Series([0, 1, 5, 2, 8])
+
+ To get an instance of :class:`~pandas.core.window.rolling.Window` we need
+ to pass the parameter `win_type`.
+
+ >>> type(ser.rolling(2, win_type='gaussian'))
+
+
+ In order to use the `SciPy` Gaussian window we need to provide the parameters
+ `M` and `std`. The parameter `M` corresponds to 2 in our example.
+ We pass the second parameter `std` as a parameter of the following method
+ (`sum` in this case):
+
+ >>> ser.rolling(2, win_type='gaussian').sum(std=3)
+ 0 NaN
+ 1 0.986207
+ 2 5.917243
+ 3 6.903450
+ 4 9.862071
+ dtype: float64
+ """
+ ),
+ window_method="rolling",
+ aggregation_description="weighted window sum",
+ agg_method="sum",
+ )
+ def sum(self, numeric_only: bool = False, **kwargs):
+ window_func = window_aggregations.roll_weighted_sum
+ # error: Argument 1 to "_apply" of "Window" has incompatible type
+ # "Callable[[ndarray, ndarray, int], ndarray]"; expected
+ # "Callable[[ndarray, int, int], ndarray]"
+ return self._apply(
+ window_func, # type: ignore[arg-type]
+ name="sum",
+ numeric_only=numeric_only,
+ **kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ kwargs_scipy,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """\
+ >>> ser = pd.Series([0, 1, 5, 2, 8])
+
+ To get an instance of :class:`~pandas.core.window.rolling.Window` we need
+ to pass the parameter `win_type`.
+
+ >>> type(ser.rolling(2, win_type='gaussian'))
+
+
+ In order to use the `SciPy` Gaussian window we need to provide the parameters
+ `M` and `std`. The parameter `M` corresponds to 2 in our example.
+ We pass the second parameter `std` as a parameter of the following method:
+
+ >>> ser.rolling(2, win_type='gaussian').mean(std=3)
+ 0 NaN
+ 1 0.5
+ 2 3.0
+ 3 3.5
+ 4 5.0
+ dtype: float64
+ """
+ ),
+ window_method="rolling",
+ aggregation_description="weighted window mean",
+ agg_method="mean",
+ )
+ def mean(self, numeric_only: bool = False, **kwargs):
+ window_func = window_aggregations.roll_weighted_mean
+ # error: Argument 1 to "_apply" of "Window" has incompatible type
+ # "Callable[[ndarray, ndarray, int], ndarray]"; expected
+ # "Callable[[ndarray, int, int], ndarray]"
+ return self._apply(
+ window_func, # type: ignore[arg-type]
+ name="mean",
+ numeric_only=numeric_only,
+ **kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ dedent(
+ """
+ ddof : int, default 1
+ Delta Degrees of Freedom. The divisor used in calculations
+ is ``N - ddof``, where ``N`` represents the number of elements.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ kwargs_scipy,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """\
+ >>> ser = pd.Series([0, 1, 5, 2, 8])
+
+ To get an instance of :class:`~pandas.core.window.rolling.Window` we need
+ to pass the parameter `win_type`.
+
+ >>> type(ser.rolling(2, win_type='gaussian'))
+
+
+ In order to use the `SciPy` Gaussian window we need to provide the parameters
+ `M` and `std`. The parameter `M` corresponds to 2 in our example.
+ We pass the second parameter `std` as a parameter of the following method:
+
+ >>> ser.rolling(2, win_type='gaussian').var(std=3)
+ 0 NaN
+ 1 0.5
+ 2 8.0
+ 3 4.5
+ 4 18.0
+ dtype: float64
+ """
+ ),
+ window_method="rolling",
+ aggregation_description="weighted window variance",
+ agg_method="var",
+ )
+ def var(self, ddof: int = 1, numeric_only: bool = False, **kwargs):
+ window_func = partial(window_aggregations.roll_weighted_var, ddof=ddof)
+ kwargs.pop("name", None)
+ return self._apply(window_func, name="var", numeric_only=numeric_only, **kwargs)
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ dedent(
+ """
+ ddof : int, default 1
+ Delta Degrees of Freedom. The divisor used in calculations
+ is ``N - ddof``, where ``N`` represents the number of elements.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ kwargs_scipy,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """\
+ >>> ser = pd.Series([0, 1, 5, 2, 8])
+
+ To get an instance of :class:`~pandas.core.window.rolling.Window` we need
+ to pass the parameter `win_type`.
+
+ >>> type(ser.rolling(2, win_type='gaussian'))
+
+
+ In order to use the `SciPy` Gaussian window we need to provide the parameters
+ `M` and `std`. The parameter `M` corresponds to 2 in our example.
+ We pass the second parameter `std` as a parameter of the following method:
+
+ >>> ser.rolling(2, win_type='gaussian').std(std=3)
+ 0 NaN
+ 1 0.707107
+ 2 2.828427
+ 3 2.121320
+ 4 4.242641
+ dtype: float64
+ """
+ ),
+ window_method="rolling",
+ aggregation_description="weighted window standard deviation",
+ agg_method="std",
+ )
+ def std(self, ddof: int = 1, numeric_only: bool = False, **kwargs):
+ return zsqrt(
+ self.var(ddof=ddof, name="std", numeric_only=numeric_only, **kwargs)
+ )
+
+
+class RollingAndExpandingMixin(BaseWindow):
+ def count(self, numeric_only: bool = False):
+ window_func = window_aggregations.roll_sum
+ return self._apply(window_func, name="count", numeric_only=numeric_only)
+
+ def apply(
+ self,
+ func: Callable[..., Any],
+ raw: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ args: tuple[Any, ...] | None = None,
+ kwargs: dict[str, Any] | None = None,
+ ):
+ if args is None:
+ args = ()
+ if kwargs is None:
+ kwargs = {}
+
+ if not is_bool(raw):
+ raise ValueError("raw parameter must be `True` or `False`")
+
+ numba_args: tuple[Any, ...] = ()
+ if maybe_use_numba(engine):
+ if raw is False:
+ raise ValueError("raw must be `True` when using the numba engine")
+ numba_args, kwargs = prepare_function_arguments(
+ func, args, kwargs, num_required_args=1
+ )
+ if self.method == "single":
+ apply_func = generate_numba_apply_func(
+ func, **get_jit_arguments(engine_kwargs)
+ )
+ else:
+ apply_func = generate_numba_table_func(
+ func, **get_jit_arguments(engine_kwargs)
+ )
+ elif engine in ("cython", None):
+ if engine_kwargs is not None:
+ raise ValueError("cython engine does not accept engine_kwargs")
+ apply_func = self._generate_cython_apply_func(args, kwargs, raw, func)
+ else:
+ raise ValueError("engine must be either 'numba' or 'cython'")
+
+ return self._apply(
+ apply_func,
+ name="apply",
+ numba_args=numba_args,
+ )
+
+ def _generate_cython_apply_func(
+ self,
+ args: tuple[Any, ...],
+ kwargs: dict[str, Any],
+ raw: bool | np.bool_,
+ function: Callable[..., Any],
+ ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]:
+ from pandas import Series
+
+ window_func = partial(
+ window_aggregations.roll_apply,
+ args=args,
+ kwargs=kwargs,
+ raw=bool(raw),
+ function=function,
+ )
+
+ def apply_func(values, begin, end, min_periods, raw=raw):
+ if not raw:
+ # GH 45912
+ values = Series(values, index=self._on, copy=False)
+ return window_func(values, begin, end, min_periods)
+
+ return apply_func
+
+ @overload
+ def pipe(
+ self,
+ func: Callable[Concatenate[Self, P], T],
+ *args: P.args,
+ **kwargs: P.kwargs,
+ ) -> T: ...
+
+ @overload
+ def pipe(
+ self,
+ func: tuple[Callable[..., T], str],
+ *args: Any,
+ **kwargs: Any,
+ ) -> T: ...
+
+ def pipe(
+ self,
+ func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
+ *args: Any,
+ **kwargs: Any,
+ ) -> T:
+ return com.pipe(self, func, *args, **kwargs)
+
+ def sum(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ if maybe_use_numba(engine):
+ if self.method == "table":
+ func = generate_manual_numpy_nan_agg_with_axis(np.nansum)
+ return self.apply(
+ func,
+ raw=True,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+ else:
+ from pandas.core._numba.kernels import sliding_sum
+
+ return self._numba_apply(sliding_sum, engine_kwargs)
+ window_func = window_aggregations.roll_sum
+ return self._apply(window_func, name="sum", numeric_only=numeric_only)
+
+ def max(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ if maybe_use_numba(engine):
+ if self.method == "table":
+ func = generate_manual_numpy_nan_agg_with_axis(np.nanmax)
+ return self.apply(
+ func,
+ raw=True,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+ else:
+ from pandas.core._numba.kernels import sliding_min_max
+
+ return self._numba_apply(sliding_min_max, engine_kwargs, is_max=True)
+ window_func = window_aggregations.roll_max
+ return self._apply(window_func, name="max", numeric_only=numeric_only)
+
+ def min(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ if maybe_use_numba(engine):
+ if self.method == "table":
+ func = generate_manual_numpy_nan_agg_with_axis(np.nanmin)
+ return self.apply(
+ func,
+ raw=True,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+ else:
+ from pandas.core._numba.kernels import sliding_min_max
+
+ return self._numba_apply(sliding_min_max, engine_kwargs, is_max=False)
+ window_func = window_aggregations.roll_min
+ return self._apply(window_func, name="min", numeric_only=numeric_only)
+
+ def mean(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ if maybe_use_numba(engine):
+ if self.method == "table":
+ func = generate_manual_numpy_nan_agg_with_axis(np.nanmean)
+ return self.apply(
+ func,
+ raw=True,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+ else:
+ from pandas.core._numba.kernels import sliding_mean
+
+ return self._numba_apply(sliding_mean, engine_kwargs)
+ window_func = window_aggregations.roll_mean
+ return self._apply(window_func, name="mean", numeric_only=numeric_only)
+
+ def median(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ if maybe_use_numba(engine):
+ if self.method == "table":
+ func = generate_manual_numpy_nan_agg_with_axis(np.nanmedian)
+ else:
+ func = np.nanmedian
+
+ return self.apply(
+ func,
+ raw=True,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+ window_func = window_aggregations.roll_median_c
+ return self._apply(window_func, name="median", numeric_only=numeric_only)
+
+ def std(
+ self,
+ ddof: int = 1,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ if maybe_use_numba(engine):
+ if self.method == "table":
+ raise NotImplementedError("std not supported with method='table'")
+ from pandas.core._numba.kernels import sliding_var
+
+ return zsqrt(self._numba_apply(sliding_var, engine_kwargs, ddof=ddof))
+ window_func = window_aggregations.roll_var
+
+ def zsqrt_func(values, begin, end, min_periods):
+ return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof))
+
+ return self._apply(
+ zsqrt_func,
+ name="std",
+ numeric_only=numeric_only,
+ )
+
+ def var(
+ self,
+ ddof: int = 1,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ if maybe_use_numba(engine):
+ if self.method == "table":
+ raise NotImplementedError("var not supported with method='table'")
+ from pandas.core._numba.kernels import sliding_var
+
+ return self._numba_apply(sliding_var, engine_kwargs, ddof=ddof)
+ window_func = partial(window_aggregations.roll_var, ddof=ddof)
+ return self._apply(
+ window_func,
+ name="var",
+ numeric_only=numeric_only,
+ )
+
+ def skew(self, numeric_only: bool = False):
+ window_func = window_aggregations.roll_skew
+ return self._apply(
+ window_func,
+ name="skew",
+ numeric_only=numeric_only,
+ )
+
+ def sem(self, ddof: int = 1, numeric_only: bool = False):
+ # Raise here so error message says sem instead of std
+ self._validate_numeric_only("sem", numeric_only)
+ return self.std(numeric_only=numeric_only) / (
+ self.count(numeric_only=numeric_only) - ddof
+ ).pow(0.5)
+
+ def kurt(self, numeric_only: bool = False):
+ window_func = window_aggregations.roll_kurt
+ return self._apply(
+ window_func,
+ name="kurt",
+ numeric_only=numeric_only,
+ )
+
+ def first(self, numeric_only: bool = False):
+ window_func = window_aggregations.roll_first
+ return self._apply(
+ window_func,
+ name="first",
+ numeric_only=numeric_only,
+ )
+
+ def last(self, numeric_only: bool = False):
+ window_func = window_aggregations.roll_last
+ return self._apply(
+ window_func,
+ name="last",
+ numeric_only=numeric_only,
+ )
+
+ def quantile(
+ self,
+ q: float,
+ interpolation: QuantileInterpolation = "linear",
+ numeric_only: bool = False,
+ ):
+ if q == 1.0:
+ window_func = window_aggregations.roll_max
+ elif q == 0.0:
+ window_func = window_aggregations.roll_min
+ else:
+ window_func = partial(
+ window_aggregations.roll_quantile,
+ quantile=q,
+ interpolation=interpolation,
+ )
+
+ return self._apply(window_func, name="quantile", numeric_only=numeric_only)
+
+ def rank(
+ self,
+ method: WindowingRankType = "average",
+ ascending: bool = True,
+ pct: bool = False,
+ numeric_only: bool = False,
+ ):
+ window_func = partial(
+ window_aggregations.roll_rank,
+ method=method,
+ ascending=ascending,
+ percentile=pct,
+ )
+
+ return self._apply(window_func, name="rank", numeric_only=numeric_only)
+
+ def nunique(
+ self,
+ numeric_only: bool = False,
+ ):
+ window_func = partial(
+ window_aggregations.roll_nunique,
+ )
+
+ return self._apply(window_func, name="nunique", numeric_only=numeric_only)
+
+ def cov(
+ self,
+ other: DataFrame | Series | None = None,
+ pairwise: bool | None = None,
+ ddof: int = 1,
+ numeric_only: bool = False,
+ ):
+ if self.step is not None:
+ raise NotImplementedError("step not implemented for cov")
+ self._validate_numeric_only("cov", numeric_only)
+
+ from pandas import Series
+
+ def cov_func(x, y):
+ x_array = self._prep_values(x)
+ y_array = self._prep_values(y)
+ window_indexer = self._get_window_indexer()
+ min_periods = (
+ self.min_periods
+ if self.min_periods is not None
+ else window_indexer.window_size
+ )
+ start, end = window_indexer.get_window_bounds(
+ num_values=len(x_array),
+ min_periods=min_periods,
+ center=self.center,
+ closed=self.closed,
+ step=self.step,
+ )
+ self._check_window_bounds(start, end, len(x_array))
+
+ with np.errstate(all="ignore"):
+ mean_x_y = window_aggregations.roll_mean(
+ x_array * y_array, start, end, min_periods
+ )
+ mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)
+ mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)
+ count_x_y = window_aggregations.roll_sum(
+ notna(x_array + y_array).astype(np.float64), start, end, 0
+ )
+ result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof))
+ return Series(result, index=x.index, name=x.name, copy=False)
+
+ return self._apply_pairwise(
+ self._selected_obj, other, pairwise, cov_func, numeric_only
+ )
+
+ def corr(
+ self,
+ other: DataFrame | Series | None = None,
+ pairwise: bool | None = None,
+ ddof: int = 1,
+ numeric_only: bool = False,
+ ):
+ if self.step is not None:
+ raise NotImplementedError("step not implemented for corr")
+ self._validate_numeric_only("corr", numeric_only)
+
+ from pandas import Series
+
+ def corr_func(x, y):
+ x_array = self._prep_values(x)
+ y_array = self._prep_values(y)
+ window_indexer = self._get_window_indexer()
+ min_periods = (
+ self.min_periods
+ if self.min_periods is not None
+ else window_indexer.window_size
+ )
+ start, end = window_indexer.get_window_bounds(
+ num_values=len(x_array),
+ min_periods=min_periods,
+ center=self.center,
+ closed=self.closed,
+ step=self.step,
+ )
+ self._check_window_bounds(start, end, len(x_array))
+
+ with np.errstate(all="ignore"):
+ mean_x_y = window_aggregations.roll_mean(
+ x_array * y_array, start, end, min_periods
+ )
+ mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)
+ mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)
+ count_x_y = window_aggregations.roll_sum(
+ notna(x_array + y_array).astype(np.float64), start, end, 0
+ )
+ x_var = window_aggregations.roll_var(
+ x_array, start, end, min_periods, ddof
+ )
+ y_var = window_aggregations.roll_var(
+ y_array, start, end, min_periods, ddof
+ )
+ numerator = (mean_x_y - mean_x * mean_y) * (
+ count_x_y / (count_x_y - ddof)
+ )
+ denominator = (x_var * y_var) ** 0.5
+ result = numerator / denominator
+ return Series(result, index=x.index, name=x.name, copy=False)
+
+ return self._apply_pairwise(
+ self._selected_obj, other, pairwise, corr_func, numeric_only
+ )
+
+
+class Rolling(RollingAndExpandingMixin):
+ _attributes: list[str] = [
+ "window",
+ "min_periods",
+ "center",
+ "win_type",
+ "on",
+ "closed",
+ "step",
+ "method",
+ ]
+
+ def _validate(self) -> None:
+ super()._validate()
+
+ # we allow rolling on a datetimelike index
+ if (
+ self.obj.empty
+ or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))
+ or (isinstance(self._on.dtype, ArrowDtype) and self._on.dtype.kind in "mM")
+ ) and isinstance(self.window, (str, BaseOffset, timedelta)):
+ self._validate_datetimelike_monotonic()
+
+ # this will raise ValueError on non-fixed freqs
+ try:
+ freq = to_offset(self.window)
+ except (TypeError, ValueError) as err:
+ raise ValueError(
+ f"passed window {self.window} is not "
+ "compatible with a datetimelike index"
+ ) from err
+ if isinstance(self._on, PeriodIndex):
+ # error: Incompatible types in assignment (expression has type
+ # "float", variable has type "Optional[int]")
+ self._win_freq_i8 = freq.nanos / ( # type: ignore[assignment]
+ self._on.freq.nanos / self._on.freq.n
+ )
+ else:
+ try:
+ unit = dtype_to_unit(self._on.dtype) # type: ignore[arg-type]
+ except TypeError:
+ # if not a datetime dtype, eg for empty dataframes
+ unit = "ns"
+ self._win_freq_i8 = Timedelta(freq.nanos).as_unit(unit)._value
+
+ # min_periods must be an integer
+ if self.min_periods is None:
+ self.min_periods = 1
+
+ if self.step is not None:
+ raise NotImplementedError(
+ "step is not supported with frequency windows"
+ )
+
+ elif isinstance(self.window, BaseIndexer):
+ # Passed BaseIndexer subclass should handle all other rolling kwargs
+ pass
+ elif not is_integer(self.window) or self.window < 0:
+ raise ValueError("window must be an integer 0 or greater")
+
+ def _validate_datetimelike_monotonic(self) -> None:
+ """
+ Validate self._on is monotonic (increasing or decreasing) and has
+ no NaT values for frequency windows.
+ """
+ if self._on.hasnans:
+ self._raise_monotonic_error("values must not have NaT")
+ if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
+ self._raise_monotonic_error("values must be monotonic")
+
+ def _raise_monotonic_error(self, msg: str):
+ on = self.on
+ if on is None:
+ on = "index"
+ raise ValueError(f"{on} {msg}")
+
+ @doc(
+ _shared_docs["aggregate"],
+ see_also=dedent(
+ """
+ See Also
+ --------
+ Series.rolling : Calling object with Series data.
+ DataFrame.rolling : Calling object with DataFrame data.
+ """
+ ),
+ examples=dedent(
+ """
+ Examples
+ --------
+ >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
+ >>> df
+ A B C
+ 0 1 4 7
+ 1 2 5 8
+ 2 3 6 9
+
+ >>> df.rolling(2).sum()
+ A B C
+ 0 NaN NaN NaN
+ 1 3.0 9.0 15.0
+ 2 5.0 11.0 17.0
+
+ >>> df.rolling(2).agg({"A": "sum", "B": "min"})
+ A B
+ 0 NaN NaN
+ 1 3.0 4.0
+ 2 5.0 5.0
+ """
+ ),
+ klass="Series/Dataframe",
+ axis="",
+ )
+ def aggregate(self, func=None, *args, **kwargs):
+ return super().aggregate(func, *args, **kwargs)
+
+ agg = aggregate
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """
+ >>> s = pd.Series([2, 3, np.nan, 10])
+ >>> s.rolling(2).count()
+ 0 NaN
+ 1 2.0
+ 2 1.0
+ 3 1.0
+ dtype: float64
+ >>> s.rolling(3).count()
+ 0 NaN
+ 1 NaN
+ 2 2.0
+ 3 2.0
+ dtype: float64
+ >>> s.rolling(4).count()
+ 0 NaN
+ 1 NaN
+ 2 NaN
+ 3 3.0
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="count of non NaN observations",
+ agg_method="count",
+ )
+ def count(self, numeric_only: bool = False):
+ return super().count(numeric_only)
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ window_apply_parameters,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """\
+ >>> ser = pd.Series([1, 6, 5, 4])
+ >>> ser.rolling(2).apply(lambda s: s.sum() - s.min())
+ 0 NaN
+ 1 6.0
+ 2 6.0
+ 3 5.0
+ dtype: float64
+ """
+ ),
+ window_method="rolling",
+ aggregation_description="custom aggregation function",
+ agg_method="apply",
+ )
+ def apply(
+ self,
+ func: Callable[..., Any],
+ raw: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ args: tuple[Any, ...] | None = None,
+ kwargs: dict[str, Any] | None = None,
+ ):
+ return super().apply(
+ func,
+ raw=raw,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ args=args,
+ kwargs=kwargs,
+ )
+
+ @overload
+ def pipe(
+ self,
+ func: Callable[Concatenate[Self, P], T],
+ *args: P.args,
+ **kwargs: P.kwargs,
+ ) -> T: ...
+
+ @overload
+ def pipe(
+ self,
+ func: tuple[Callable[..., T], str],
+ *args: Any,
+ **kwargs: Any,
+ ) -> T: ...
+
+ @final
+ @Substitution(
+ klass="Rolling",
+ examples="""
+ >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
+ ... index=pd.date_range('2012-08-02', periods=4))
+ >>> df
+ A
+ 2012-08-02 1
+ 2012-08-03 2
+ 2012-08-04 3
+ 2012-08-05 4
+
+ To get the difference between each rolling 2-day window's maximum and minimum
+ value in one pass, you can do
+
+ >>> df.rolling('2D').pipe(lambda x: x.max() - x.min())
+ A
+ 2012-08-02 0.0
+ 2012-08-03 1.0
+ 2012-08-04 1.0
+ 2012-08-05 1.0""",
+ )
+ @Appender(template_pipe)
+ def pipe(
+ self,
+ func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
+ *args: Any,
+ **kwargs: Any,
+ ) -> T:
+ return super().pipe(func, *args, **kwargs)
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ window_agg_numba_parameters(),
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Notes"),
+ numba_notes,
+ create_section_header("Examples"),
+ dedent(
+ """
+ >>> s = pd.Series([1, 2, 3, 4, 5])
+ >>> s
+ 0 1
+ 1 2
+ 2 3
+ 3 4
+ 4 5
+ dtype: int64
+
+ >>> s.rolling(3).sum()
+ 0 NaN
+ 1 NaN
+ 2 6.0
+ 3 9.0
+ 4 12.0
+ dtype: float64
+
+ >>> s.rolling(3, center=True).sum()
+ 0 NaN
+ 1 6.0
+ 2 9.0
+ 3 12.0
+ 4 NaN
+ dtype: float64
+
+ For DataFrame, each sum is computed column-wise.
+
+ >>> df = pd.DataFrame({{"A": s, "B": s ** 2}})
+ >>> df
+ A B
+ 0 1 1
+ 1 2 4
+ 2 3 9
+ 3 4 16
+ 4 5 25
+
+ >>> df.rolling(3).sum()
+ A B
+ 0 NaN NaN
+ 1 NaN NaN
+ 2 6.0 14.0
+ 3 9.0 29.0
+ 4 12.0 50.0
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="sum",
+ agg_method="sum",
+ )
+ def sum(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ return super().sum(
+ numeric_only=numeric_only,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ dedent(
+ """
+ *args : iterable, optional
+ Positional arguments passed into ``func``.\n
+ """
+ ).replace("\n", "", 1),
+ window_agg_numba_parameters(),
+ dedent(
+ """
+ **kwargs : mapping, optional
+ A dictionary of keyword arguments passed into ``func``.\n
+ """
+ ).replace("\n", "", 1),
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Notes"),
+ numba_notes,
+ create_section_header("Examples"),
+ dedent(
+ """\
+ >>> ser = pd.Series([1, 2, 3, 4])
+ >>> ser.rolling(2).max()
+ 0 NaN
+ 1 2.0
+ 2 3.0
+ 3 4.0
+ dtype: float64
+ """
+ ),
+ window_method="rolling",
+ aggregation_description="maximum",
+ agg_method="max",
+ )
+ def max(
+ self,
+ numeric_only: bool = False,
+ *args,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ **kwargs,
+ ):
+ return super().max(
+ numeric_only=numeric_only,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ window_agg_numba_parameters(),
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Notes"),
+ numba_notes,
+ create_section_header("Examples"),
+ dedent(
+ """
+ Performing a rolling minimum with a window size of 3.
+
+ >>> s = pd.Series([4, 3, 5, 2, 6])
+ >>> s.rolling(3).min()
+ 0 NaN
+ 1 NaN
+ 2 3.0
+ 3 2.0
+ 4 2.0
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="minimum",
+ agg_method="min",
+ )
+ def min(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ return super().min(
+ numeric_only=numeric_only,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ window_agg_numba_parameters(),
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Notes"),
+ numba_notes,
+ create_section_header("Examples"),
+ dedent(
+ """
+ The below examples will show rolling mean calculations with window sizes of
+ two and three, respectively.
+
+ >>> s = pd.Series([1, 2, 3, 4])
+ >>> s.rolling(2).mean()
+ 0 NaN
+ 1 1.5
+ 2 2.5
+ 3 3.5
+ dtype: float64
+
+ >>> s.rolling(3).mean()
+ 0 NaN
+ 1 NaN
+ 2 2.0
+ 3 3.0
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="mean",
+ agg_method="mean",
+ )
+ def mean(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ return super().mean(
+ numeric_only=numeric_only,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ window_agg_numba_parameters(),
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Notes"),
+ numba_notes,
+ create_section_header("Examples"),
+ dedent(
+ """
+ Compute the rolling median of a series with a window size of 3.
+
+ >>> s = pd.Series([0, 1, 2, 3, 4])
+ >>> s.rolling(3).median()
+ 0 NaN
+ 1 NaN
+ 2 1.0
+ 3 2.0
+ 4 3.0
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="median",
+ agg_method="median",
+ )
+ def median(
+ self,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ return super().median(
+ numeric_only=numeric_only,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ dedent(
+ """
+ ddof : int, default 1
+ Delta Degrees of Freedom. The divisor used in calculations
+ is ``N - ddof``, where ``N`` represents the number of elements.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ window_agg_numba_parameters("1.4"),
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ "numpy.std : Equivalent method for NumPy array.\n",
+ template_see_also,
+ create_section_header("Notes"),
+ dedent(
+ """
+ The default ``ddof`` of 1 used in :meth:`Series.std` is different
+ than the default ``ddof`` of 0 in :func:`numpy.std`.
+
+ A minimum of one period is required for the rolling calculation.\n
+ """
+ ).replace("\n", "", 1),
+ create_section_header("Examples"),
+ dedent(
+ """
+ >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
+ >>> s.rolling(3).std()
+ 0 NaN
+ 1 NaN
+ 2 0.577350
+ 3 1.000000
+ 4 1.000000
+ 5 1.154701
+ 6 0.000000
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="standard deviation",
+ agg_method="std",
+ )
+ def std(
+ self,
+ ddof: int = 1,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ return super().std(
+ ddof=ddof,
+ numeric_only=numeric_only,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ dedent(
+ """
+ ddof : int, default 1
+ Delta Degrees of Freedom. The divisor used in calculations
+ is ``N - ddof``, where ``N`` represents the number of elements.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ window_agg_numba_parameters("1.4"),
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ "numpy.var : Equivalent method for NumPy array.\n",
+ template_see_also,
+ create_section_header("Notes"),
+ dedent(
+ """
+ The default ``ddof`` of 1 used in :meth:`Series.var` is different
+ than the default ``ddof`` of 0 in :func:`numpy.var`.
+
+ A minimum of one period is required for the rolling calculation.\n
+ """
+ ).replace("\n", "", 1),
+ create_section_header("Examples"),
+ dedent(
+ """
+ >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
+ >>> s.rolling(3).var()
+ 0 NaN
+ 1 NaN
+ 2 0.333333
+ 3 1.000000
+ 4 1.000000
+ 5 1.333333
+ 6 0.000000
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="variance",
+ agg_method="var",
+ )
+ def var(
+ self,
+ ddof: int = 1,
+ numeric_only: bool = False,
+ engine: Literal["cython", "numba"] | None = None,
+ engine_kwargs: dict[str, bool] | None = None,
+ ):
+ return super().var(
+ ddof=ddof,
+ numeric_only=numeric_only,
+ engine=engine,
+ engine_kwargs=engine_kwargs,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ "scipy.stats.skew : Third moment of a probability density.\n",
+ template_see_also,
+ create_section_header("Notes"),
+ dedent(
+ """
+ A minimum of three periods is required for the rolling calculation.\n
+ """
+ ),
+ create_section_header("Examples"),
+ dedent(
+ """\
+ >>> ser = pd.Series([1, 5, 2, 7, 15, 6])
+ >>> ser.rolling(3).skew().round(6)
+ 0 NaN
+ 1 NaN
+ 2 1.293343
+ 3 -0.585583
+ 4 0.670284
+ 5 1.652317
+ dtype: float64
+ """
+ ),
+ window_method="rolling",
+ aggregation_description="unbiased skewness",
+ agg_method="skew",
+ )
+ def skew(self, numeric_only: bool = False):
+ return super().skew(numeric_only=numeric_only)
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ dedent(
+ """
+ ddof : int, default 1
+ Delta Degrees of Freedom. The divisor used in calculations
+ is ``N - ddof``, where ``N`` represents the number of elements.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Notes"),
+ "A minimum of one period is required for the calculation.\n\n",
+ create_section_header("Examples"),
+ dedent(
+ """
+ >>> s = pd.Series([0, 1, 2, 3])
+ >>> s.rolling(2, min_periods=1).sem()
+ 0 NaN
+ 1 0.707107
+ 2 0.707107
+ 3 0.707107
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="standard error of mean",
+ agg_method="sem",
+ )
+ def sem(self, ddof: int = 1, numeric_only: bool = False):
+ # Raise here so error message says sem instead of std
+ self._validate_numeric_only("sem", numeric_only)
+ return self.std(numeric_only=numeric_only) / (
+ self.count(numeric_only) - ddof
+ ).pow(0.5)
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ "scipy.stats.kurtosis : Reference SciPy method.\n",
+ template_see_also,
+ create_section_header("Notes"),
+ "A minimum of four periods is required for the calculation.\n\n",
+ create_section_header("Examples"),
+ dedent(
+ """
+ The example below will show a rolling calculation with a window size of
+ four matching the equivalent function call using `scipy.stats`.
+
+ >>> arr = [1, 2, 3, 4, 999]
+ >>> import scipy.stats
+ >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")
+ -1.200000
+ >>> print(f"{{scipy.stats.kurtosis(arr[1:], bias=False):.6f}}")
+ 3.999946
+ >>> s = pd.Series(arr)
+ >>> s.rolling(4).kurt()
+ 0 NaN
+ 1 NaN
+ 2 NaN
+ 3 -1.200000
+ 4 3.999946
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="Fisher's definition of kurtosis without bias",
+ agg_method="kurt",
+ )
+ def kurt(self, numeric_only: bool = False):
+ return super().kurt(numeric_only=numeric_only)
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ dedent(
+ """
+ GroupBy.first : Similar method for GroupBy objects.
+ Rolling.last : Method to get the last element in each window.\n
+ """
+ ).replace("\n", "", 1),
+ create_section_header("Examples"),
+ dedent(
+ """
+ The example below will show a rolling calculation with a window size of
+ three.
+
+ >>> s = pd.Series(range(5))
+ >>> s.rolling(3).first()
+ 0 NaN
+ 1 NaN
+ 2 0.0
+ 3 1.0
+ 4 2.0
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="First (left-most) element of the window",
+ agg_method="first",
+ )
+ def first(self, numeric_only: bool = False):
+ return super().first(numeric_only=numeric_only)
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ dedent(
+ """
+ GroupBy.last : Similar method for GroupBy objects.
+ Rolling.first : Method to get the first element in each window.\n
+ """
+ ).replace("\n", "", 1),
+ create_section_header("Examples"),
+ dedent(
+ """
+ The example below will show a rolling calculation with a window size of
+ three.
+
+ >>> s = pd.Series(range(5))
+ >>> s.rolling(3).last()
+ 0 NaN
+ 1 NaN
+ 2 2.0
+ 3 3.0
+ 4 4.0
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="Last (right-most) element of the window",
+ agg_method="last",
+ )
+ def last(self, numeric_only: bool = False):
+ return super().last(numeric_only=numeric_only)
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ dedent(
+ """
+ q : float
+ Quantile to compute. 0 <= quantile <= 1.
+
+ .. deprecated:: 2.1.0
+ This was renamed from 'quantile' to 'q' in version 2.1.0.
+ interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}
+ This optional parameter specifies the interpolation method to use,
+ when the desired quantile lies between two data points `i` and `j`:
+
+ * linear: `i + (j - i) * fraction`, where `fraction` is the
+ fractional part of the index surrounded by `i` and `j`.
+ * lower: `i`.
+ * higher: `j`.
+ * nearest: `i` or `j` whichever is nearest.
+ * midpoint: (`i` + `j`) / 2.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """
+ >>> s = pd.Series([1, 2, 3, 4])
+ >>> s.rolling(2).quantile(.4, interpolation='lower')
+ 0 NaN
+ 1 1.0
+ 2 2.0
+ 3 3.0
+ dtype: float64
+
+ >>> s.rolling(2).quantile(.4, interpolation='midpoint')
+ 0 NaN
+ 1 1.5
+ 2 2.5
+ 3 3.5
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="quantile",
+ agg_method="quantile",
+ )
+ def quantile(
+ self,
+ q: float,
+ interpolation: QuantileInterpolation = "linear",
+ numeric_only: bool = False,
+ ):
+ return super().quantile(
+ q=q,
+ interpolation=interpolation,
+ numeric_only=numeric_only,
+ )
+
+ @doc(
+ template_header,
+ ".. versionadded:: 1.4.0 \n\n",
+ create_section_header("Parameters"),
+ dedent(
+ """
+ method : {{'average', 'min', 'max'}}, default 'average'
+ How to rank the group of records that have the same value (i.e. ties):
+
+ * average: average rank of the group
+ * min: lowest rank in the group
+ * max: highest rank in the group
+
+ ascending : bool, default True
+ Whether or not the elements should be ranked in ascending order.
+ pct : bool, default False
+ Whether or not to display the returned rankings in percentile
+ form.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """
+ >>> s = pd.Series([1, 4, 2, 3, 5, 3])
+ >>> s.rolling(3).rank()
+ 0 NaN
+ 1 NaN
+ 2 2.0
+ 3 2.0
+ 4 3.0
+ 5 1.5
+ dtype: float64
+
+ >>> s.rolling(3).rank(method="max")
+ 0 NaN
+ 1 NaN
+ 2 2.0
+ 3 2.0
+ 4 3.0
+ 5 2.0
+ dtype: float64
+
+ >>> s.rolling(3).rank(method="min")
+ 0 NaN
+ 1 NaN
+ 2 2.0
+ 3 2.0
+ 4 3.0
+ 5 1.0
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="rank",
+ agg_method="rank",
+ )
+ def rank(
+ self,
+ method: WindowingRankType = "average",
+ ascending: bool = True,
+ pct: bool = False,
+ numeric_only: bool = False,
+ ):
+ return super().rank(
+ method=method,
+ ascending=ascending,
+ pct=pct,
+ numeric_only=numeric_only,
+ )
+
+ @doc(
+ template_header,
+ ".. versionadded:: 3.0.0 \n\n",
+ create_section_header("Parameters"),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """
+ >>> s = pd.Series([1, 4, 2, np.nan, 3, 3, 4, 5])
+ >>> s.rolling(3).nunique()
+ 0 NaN
+ 1 NaN
+ 2 3.0
+ 3 NaN
+ 4 NaN
+ 5 NaN
+ 6 2.0
+ 7 3.0
+ dtype: float64
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="nunique",
+ agg_method="nunique",
+ )
+ def nunique(
+ self,
+ numeric_only: bool = False,
+ ):
+ return super().nunique(
+ numeric_only=numeric_only,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ dedent(
+ """
+ other : Series or DataFrame, optional
+ If not supplied then will default to self and produce pairwise
+ output.
+ pairwise : bool, default None
+ If False then only matching columns between self and other will be
+ used and the output will be a DataFrame.
+ If True then all pairwise combinations will be calculated and the
+ output will be a MultiIndexed DataFrame in the case of DataFrame
+ inputs. In the case of missing elements, only complete pairwise
+ observations will be used.
+ ddof : int, default 1
+ Delta Degrees of Freedom. The divisor used in calculations
+ is ``N - ddof``, where ``N`` represents the number of elements.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ template_see_also,
+ create_section_header("Examples"),
+ dedent(
+ """\
+ >>> ser1 = pd.Series([1, 2, 3, 4])
+ >>> ser2 = pd.Series([1, 4, 5, 8])
+ >>> ser1.rolling(2).cov(ser2)
+ 0 NaN
+ 1 1.5
+ 2 0.5
+ 3 1.5
+ dtype: float64
+ """
+ ),
+ window_method="rolling",
+ aggregation_description="sample covariance",
+ agg_method="cov",
+ )
+ def cov(
+ self,
+ other: DataFrame | Series | None = None,
+ pairwise: bool | None = None,
+ ddof: int = 1,
+ numeric_only: bool = False,
+ ):
+ return super().cov(
+ other=other,
+ pairwise=pairwise,
+ ddof=ddof,
+ numeric_only=numeric_only,
+ )
+
+ @doc(
+ template_header,
+ create_section_header("Parameters"),
+ dedent(
+ """
+ other : Series or DataFrame, optional
+ If not supplied then will default to self and produce pairwise
+ output.
+ pairwise : bool, default None
+ If False then only matching columns between self and other will be
+ used and the output will be a DataFrame.
+ If True then all pairwise combinations will be calculated and the
+ output will be a MultiIndexed DataFrame in the case of DataFrame
+ inputs. In the case of missing elements, only complete pairwise
+ observations will be used.
+ ddof : int, default 1
+ Delta Degrees of Freedom. The divisor used in calculations
+ is ``N - ddof``, where ``N`` represents the number of elements.
+ """
+ ).replace("\n", "", 1),
+ kwargs_numeric_only,
+ create_section_header("Returns"),
+ template_returns,
+ create_section_header("See Also"),
+ dedent(
+ """
+ cov : Similar method to calculate covariance.
+ numpy.corrcoef : NumPy Pearson's correlation calculation.
+ """
+ ).replace("\n", "", 1),
+ template_see_also,
+ create_section_header("Notes"),
+ dedent(
+ """
+ This function uses Pearson's definition of correlation
+ (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
+
+ When `other` is not specified, the output will be self correlation (e.g.
+ all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
+ set to `True`.
+
+ Function will return ``NaN`` for correlations of equal valued sequences;
+ this is the result of a 0/0 division error.
+
+ When `pairwise` is set to `False`, only matching columns between `self` and
+ `other` will be used.
+
+ When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
+ with the original index on the first level, and the `other` DataFrame
+ columns on the second level.
+
+ In the case of missing elements, only complete pairwise observations
+ will be used.\n
+ """
+ ).replace("\n", "", 1),
+ create_section_header("Examples"),
+ dedent(
+ """
+ The below example shows a rolling calculation with a window size of
+ four matching the equivalent function call using :meth:`numpy.corrcoef`.
+
+ >>> v1 = [3, 3, 3, 5, 8]
+ >>> v2 = [3, 4, 4, 4, 8]
+ >>> np.corrcoef(v1[:-1], v2[:-1])
+ array([[1. , 0.33333333],
+ [0.33333333, 1. ]])
+ >>> np.corrcoef(v1[1:], v2[1:])
+ array([[1. , 0.9169493],
+ [0.9169493, 1. ]])
+ >>> s1 = pd.Series(v1)
+ >>> s2 = pd.Series(v2)
+ >>> s1.rolling(4).corr(s2)
+ 0 NaN
+ 1 NaN
+ 2 NaN
+ 3 0.333333
+ 4 0.916949
+ dtype: float64
+
+ The below example shows a similar rolling calculation on a
+ DataFrame using the pairwise option.
+
+ >>> matrix = np.array([[51., 35.],
+ ... [49., 30.],
+ ... [47., 32.],
+ ... [46., 31.],
+ ... [50., 36.]])
+ >>> np.corrcoef(matrix[:-1, 0], matrix[:-1, 1])
+ array([[1. , 0.6263001],
+ [0.6263001, 1. ]])
+ >>> np.corrcoef(matrix[1:, 0], matrix[1:, 1])
+ array([[1. , 0.55536811],
+ [0.55536811, 1. ]])
+ >>> df = pd.DataFrame(matrix, columns=['X', 'Y'])
+ >>> df
+ X Y
+ 0 51.0 35.0
+ 1 49.0 30.0
+ 2 47.0 32.0
+ 3 46.0 31.0
+ 4 50.0 36.0
+ >>> df.rolling(4).corr(pairwise=True)
+ X Y
+ 0 X NaN NaN
+ Y NaN NaN
+ 1 X NaN NaN
+ Y NaN NaN
+ 2 X NaN NaN
+ Y NaN NaN
+ 3 X 1.000000 0.626300
+ Y 0.626300 1.000000
+ 4 X 1.000000 0.555368
+ Y 0.555368 1.000000
+ """
+ ).replace("\n", "", 1),
+ window_method="rolling",
+ aggregation_description="correlation",
+ agg_method="corr",
+ )
+ def corr(
+ self,
+ other: DataFrame | Series | None = None,
+ pairwise: bool | None = None,
+ ddof: int = 1,
+ numeric_only: bool = False,
+ ):
+ return super().corr(
+ other=other,
+ pairwise=pairwise,
+ ddof=ddof,
+ numeric_only=numeric_only,
+ )
+
+
+Rolling.__doc__ = Window.__doc__
+
+
+class RollingGroupby(BaseWindowGroupby, Rolling):
+ """
+ Provide a rolling groupby implementation.
+ """
+
+ _attributes = Rolling._attributes + BaseWindowGroupby._attributes
+
+ def _get_window_indexer(self) -> GroupbyIndexer:
+ """
+ Return an indexer class that will compute the window start and end bounds
+
+ Returns
+ -------
+ GroupbyIndexer
+ """
+ rolling_indexer: type[BaseIndexer]
+ indexer_kwargs: dict[str, Any] | None = None
+ index_array = self._index_array
+ if isinstance(self.window, BaseIndexer):
+ rolling_indexer = type(self.window)
+ indexer_kwargs = self.window.__dict__.copy()
+ assert isinstance(indexer_kwargs, dict) # for mypy
+ # We'll be using the index of each group later
+ indexer_kwargs.pop("index_array", None)
+ window = self.window
+ elif self._win_freq_i8 is not None:
+ rolling_indexer = VariableWindowIndexer
+ # error: Incompatible types in assignment (expression has type
+ # "int", variable has type "BaseIndexer")
+ window = self._win_freq_i8 # type: ignore[assignment]
+ else:
+ rolling_indexer = FixedWindowIndexer
+ window = self.window
+ window_indexer = GroupbyIndexer(
+ index_array=index_array,
+ window_size=window,
+ groupby_indices=self._grouper.indices,
+ window_indexer=rolling_indexer,
+ indexer_kwargs=indexer_kwargs,
+ )
+ return window_indexer
+
+ def _validate_datetimelike_monotonic(self) -> None:
+ """
+ Validate that each group in self._on is monotonic
+ """
+ # GH 46061
+ if self._on.hasnans:
+ self._raise_monotonic_error("values must not have NaT")
+ for group_indices in self._grouper.indices.values():
+ group_on = self._on.take(group_indices)
+ if not (
+ group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
+ ):
+ on = "index" if self.on is None else self.on
+ raise ValueError(
+ f"Each group within {on} must be monotonic. "
+ f"Sort the values in {on} first."
+ )