diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst index 14af2b8a120e0..fb89fd2a5ffb2 100644 --- a/doc/source/reference/window.rst +++ b/doc/source/reference/window.rst @@ -35,6 +35,7 @@ Rolling window functions Rolling.skew Rolling.kurt Rolling.apply + Rolling.pipe Rolling.aggregate Rolling.quantile Rolling.sem @@ -76,6 +77,7 @@ Expanding window functions Expanding.skew Expanding.kurt Expanding.apply + Expanding.pipe Expanding.aggregate Expanding.quantile Expanding.sem diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 47838d1e49d61..34df7fc2027a5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -44,6 +44,7 @@ Other enhancements - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) +- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`) - :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`) - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`) diff --git a/pandas/core/window/doc.py b/pandas/core/window/doc.py index cdb670ee218b4..6dbc52a99e70c 100644 --- a/pandas/core/window/doc.py +++ b/pandas/core/window/doc.py @@ -85,6 +85,63 @@ def create_section_header(header: str) -> str: """ ).replace("\n", "", 1) +template_pipe = """ +Apply a ``func`` with arguments to this %(klass)s object and return its result. + +Use `.pipe` when you want to improve readability by chaining together +functions that expect Series, DataFrames, GroupBy, Rolling, Expanding or Resampler +objects. +Instead of writing + +>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3 +>>> g = lambda x, arg1: x * 5 / arg1 +>>> f = lambda x: x ** 4 +>>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, index=pd.date_range('2012-08-02', periods=4)) +>>> h(g(f(df.rolling('2D')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP + +You can write + +>>> (df.rolling('2D') +... .pipe(f) +... .pipe(g, arg1=1) +... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP + +which is much more readable. + +Parameters +---------- +func : callable or tuple of (callable, str) + Function to apply to this %(klass)s object or, alternatively, + a `(callable, data_keyword)` tuple where `data_keyword` is a + string indicating the keyword of `callable` that expects the + %(klass)s object. +*args : iterable, optional + Positional arguments passed into `func`. +**kwargs : dict, optional + A dictionary of keyword arguments passed into `func`. + +Returns +------- +%(klass)s + The original object with the function `func` applied. + +See Also +-------- +Series.pipe : Apply a function with arguments to a series. +DataFrame.pipe: Apply a function with arguments to a dataframe. +apply : Apply function to each group instead of to the + full %(klass)s object. + +Notes +----- +See more `here +`_ + +Examples +-------- +%(examples)s +""" + numba_notes = ( "See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for " "extended documentation and performance considerations for the Numba engine.\n\n" diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index bff3a1660eba9..6a7d0329ab6da 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -5,9 +5,15 @@ TYPE_CHECKING, Any, Literal, + final, + overload, ) -from pandas.util._decorators import doc +from pandas.util._decorators import ( + Appender, + Substitution, + doc, +) from pandas.core.indexers.objects import ( BaseIndexer, @@ -20,6 +26,7 @@ kwargs_numeric_only, numba_notes, template_header, + template_pipe, template_returns, template_see_also, window_agg_numba_parameters, @@ -34,7 +41,11 @@ from collections.abc import Callable from pandas._typing import ( + Concatenate, + P, QuantileInterpolation, + Self, + T, WindowingRankType, ) @@ -241,6 +252,54 @@ def apply( kwargs=kwargs, ) + @overload + def pipe( + self, + func: Callable[Concatenate[Self, P], T], + *args: P.args, + **kwargs: P.kwargs, + ) -> T: ... + + @overload + def pipe( + self, + func: tuple[Callable[..., T], str], + *args: Any, + **kwargs: Any, + ) -> T: ... + + @final + @Substitution( + klass="Expanding", + examples=""" + >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, + ... index=pd.date_range('2012-08-02', periods=4)) + >>> df + A + 2012-08-02 1 + 2012-08-03 2 + 2012-08-04 3 + 2012-08-05 4 + + To get the difference between each expanding window's maximum and minimum + value in one pass, you can do + + >>> df.expanding().pipe(lambda x: x.max() - x.min()) + A + 2012-08-02 0.0 + 2012-08-03 1.0 + 2012-08-04 2.0 + 2012-08-05 3.0""", + ) + @Appender(template_pipe) + def pipe( + self, + func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str], + *args: Any, + **kwargs: Any, + ) -> T: + return super().pipe(func, *args, **kwargs) + @doc( template_header, create_section_header("Parameters"), diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 385ffb901acf0..90c3cff975ff0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -14,6 +14,8 @@ TYPE_CHECKING, Any, Literal, + final, + overload, ) import numpy as np @@ -26,7 +28,11 @@ import pandas._libs.window.aggregations as window_aggregations from pandas.compat._optional import import_optional_dependency from pandas.errors import DataError -from pandas.util._decorators import doc +from pandas.util._decorators import ( + Appender, + Substitution, + doc, +) from pandas.core.dtypes.common import ( ensure_float64, @@ -81,6 +87,7 @@ kwargs_scipy, numba_notes, template_header, + template_pipe, template_returns, template_see_also, window_agg_numba_parameters, @@ -102,8 +109,12 @@ from pandas._typing import ( ArrayLike, + Concatenate, NDFrameT, QuantileInterpolation, + P, + Self, + T, WindowingRankType, npt, ) @@ -1529,6 +1540,30 @@ def apply_func(values, begin, end, min_periods, raw=raw): return apply_func + @overload + def pipe( + self, + func: Callable[Concatenate[Self, P], T], + *args: P.args, + **kwargs: P.kwargs, + ) -> T: ... + + @overload + def pipe( + self, + func: tuple[Callable[..., T], str], + *args: Any, + **kwargs: Any, + ) -> T: ... + + def pipe( + self, + func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str], + *args: Any, + **kwargs: Any, + ) -> T: + return com.pipe(self, func, *args, **kwargs) + def sum( self, numeric_only: bool = False, @@ -2044,6 +2079,54 @@ def apply( kwargs=kwargs, ) + @overload + def pipe( + self, + func: Callable[Concatenate[Self, P], T], + *args: P.args, + **kwargs: P.kwargs, + ) -> T: ... + + @overload + def pipe( + self, + func: tuple[Callable[..., T], str], + *args: Any, + **kwargs: Any, + ) -> T: ... + + @final + @Substitution( + klass="Rolling", + examples=""" + >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, + ... index=pd.date_range('2012-08-02', periods=4)) + >>> df + A + 2012-08-02 1 + 2012-08-03 2 + 2012-08-04 3 + 2012-08-05 4 + + To get the difference between each rolling 2-day window's maximum and minimum + value in one pass, you can do + + >>> df.rolling('2D').pipe(lambda x: x.max() - x.min()) + A + 2012-08-02 0.0 + 2012-08-03 1.0 + 2012-08-04 1.0 + 2012-08-05 1.0""", + ) + @Appender(template_pipe) + def pipe( + self, + func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str], + *args: Any, + **kwargs: Any, + ) -> T: + return super().pipe(func, *args, **kwargs) + @doc( template_header, create_section_header("Parameters"), diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 15eaa8c167487..877b50e37670c 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -177,6 +177,38 @@ def test_agg_nested_dicts(): r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) +@pytest.mark.parametrize( + "func,window_size", + [ + ( + "rolling", + 2, + ), + ( + "expanding", + None, + ), + ], +) +def test_pipe(func, window_size): + # Issue #57076 + df = DataFrame( + { + "B": np.random.default_rng(2).standard_normal(10), + "C": np.random.default_rng(2).standard_normal(10), + } + ) + r = getattr(df, func)(window_size) + + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_frame_equal(result, expected) + + expected = r.max() - 2 * r.min() + result = r.pipe(lambda x, k: x.max() - k * x.min(), k=2) + tm.assert_frame_equal(result, expected) + + def test_count_nonnumeric_types(step): # GH12541 cols = [