Skip to content

ENH: Support pipe() method in Rolling and Expanding #60697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Other enhancements
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`)
- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
Expand Down
57 changes: 57 additions & 0 deletions pandas/core/window/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,63 @@ def create_section_header(header: str) -> str:
"""
).replace("\n", "", 1)

template_pipe = """
Apply a ``func`` with arguments to this %(klass)s object and return its result.

Use `.pipe` when you want to improve readability by chaining together
functions that expect Series, DataFrames, GroupBy, Rolling, Expanding or Resampler
objects.
Instead of writing

>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
>>> g = lambda x, arg1: x * 5 / arg1
>>> f = lambda x: x ** 4
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, index=pd.date_range('2012-08-02', periods=4))
>>> h(g(f(df.rolling('2D')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP

You can write

>>> (df.rolling('2D')
... .pipe(f)
... .pipe(g, arg1=1)
... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP

which is much more readable.

Parameters
----------
func : callable or tuple of (callable, str)
Function to apply to this %(klass)s object or, alternatively,
a `(callable, data_keyword)` tuple where `data_keyword` is a
string indicating the keyword of `callable` that expects the
%(klass)s object.
*args : iterable, optional
Positional arguments passed into `func`.
**kwargs : dict, optional
A dictionary of keyword arguments passed into `func`.

Returns
-------
%(klass)s
The original object with the function `func` applied.

See Also
--------
Series.pipe : Apply a function with arguments to a series.
DataFrame.pipe: Apply a function with arguments to a dataframe.
apply : Apply function to each group instead of to the
full %(klass)s object.

Notes
-----
See more `here
<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_

Examples
--------
%(examples)s
"""

numba_notes = (
"See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for "
"extended documentation and performance considerations for the Numba engine.\n\n"
Expand Down
61 changes: 60 additions & 1 deletion pandas/core/window/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,15 @@
TYPE_CHECKING,
Any,
Literal,
final,
overload,
)

from pandas.util._decorators import doc
from pandas.util._decorators import (
Appender,
Substitution,
doc,
)

from pandas.core.indexers.objects import (
BaseIndexer,
Expand All @@ -20,6 +26,7 @@
kwargs_numeric_only,
numba_notes,
template_header,
template_pipe,
template_returns,
template_see_also,
window_agg_numba_parameters,
Expand All @@ -34,7 +41,11 @@
from collections.abc import Callable

from pandas._typing import (
Concatenate,
P,
QuantileInterpolation,
Self,
T,
WindowingRankType,
)

Expand Down Expand Up @@ -241,6 +252,54 @@ def apply(
kwargs=kwargs,
)

@overload
def pipe(
self,
func: Callable[Concatenate[Self, P], T],
*args: P.args,
**kwargs: P.kwargs,
) -> T: ...

@overload
def pipe(
self,
func: tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T: ...

@final
@Substitution(
klass="Expanding",
examples="""
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
... index=pd.date_range('2012-08-02', periods=4))
>>> df
A
2012-08-02 1
2012-08-03 2
2012-08-04 3
2012-08-05 4
To get the difference between each expanding window's maximum and minimum
value in one pass, you can do
>>> df.expanding().pipe(lambda x: x.max() - x.min())
A
2012-08-02 0.0
2012-08-03 1.0
2012-08-04 2.0
2012-08-05 3.0""",
)
@Appender(template_pipe)
def pipe(
self,
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
return super().pipe(func, *args, **kwargs)

@doc(
template_header,
create_section_header("Parameters"),
Expand Down
85 changes: 84 additions & 1 deletion pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
TYPE_CHECKING,
Any,
Literal,
final,
overload,
)

import numpy as np
Expand All @@ -26,7 +28,11 @@
import pandas._libs.window.aggregations as window_aggregations
from pandas.compat._optional import import_optional_dependency
from pandas.errors import DataError
from pandas.util._decorators import doc
from pandas.util._decorators import (
Appender,
Substitution,
doc,
)

from pandas.core.dtypes.common import (
ensure_float64,
Expand Down Expand Up @@ -81,6 +87,7 @@
kwargs_scipy,
numba_notes,
template_header,
template_pipe,
template_returns,
template_see_also,
window_agg_numba_parameters,
Expand All @@ -102,8 +109,12 @@

from pandas._typing import (
ArrayLike,
Concatenate,
NDFrameT,
QuantileInterpolation,
P,
Self,
T,
WindowingRankType,
npt,
)
Expand Down Expand Up @@ -1529,6 +1540,30 @@ def apply_func(values, begin, end, min_periods, raw=raw):

return apply_func

@overload
def pipe(
self,
func: Callable[Concatenate[Self, P], T],
*args: P.args,
**kwargs: P.kwargs,
) -> T: ...

@overload
def pipe(
self,
func: tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T: ...

def pipe(
self,
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
return com.pipe(self, func, *args, **kwargs)

def sum(
self,
numeric_only: bool = False,
Expand Down Expand Up @@ -2044,6 +2079,54 @@ def apply(
kwargs=kwargs,
)

@overload
def pipe(
self,
func: Callable[Concatenate[Self, P], T],
*args: P.args,
**kwargs: P.kwargs,
) -> T: ...

@overload
def pipe(
self,
func: tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T: ...

@final
@Substitution(
klass="Rolling",
examples="""
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
... index=pd.date_range('2012-08-02', periods=4))
>>> df
A
2012-08-02 1
2012-08-03 2
2012-08-04 3
2012-08-05 4

To get the difference between each rolling 2-day window's maximum and minimum
value in one pass, you can do

>>> df.rolling('2D').pipe(lambda x: x.max() - x.min())
A
2012-08-02 0.0
2012-08-03 1.0
2012-08-04 1.0
2012-08-05 1.0""",
)
@Appender(template_pipe)
def pipe(
self,
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
return super().pipe(func, *args, **kwargs)

@doc(
template_header,
create_section_header("Parameters"),
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/window/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,38 @@ def test_agg_nested_dicts():
r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})


@pytest.mark.parametrize(
"func,window_size",
[
(
"rolling",
2,
),
(
"expanding",
None,
),
],
)
def test_pipe(func, window_size):
# Issue #57076
df = DataFrame(
{
"B": np.random.default_rng(2).standard_normal(10),
"C": np.random.default_rng(2).standard_normal(10),
}
)
r = getattr(df, func)(window_size)

expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_frame_equal(result, expected)

expected = r.max() - 2 * r.min()
result = r.pipe(lambda x, k: x.max() - k * x.min(), k=2)
tm.assert_frame_equal(result, expected)


def test_count_nonnumeric_types(step):
# GH12541
cols = [
Expand Down