Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/user_guide/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
df

# List the size of the animals with the highest weight.
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])

`Using get_group
<https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
Expand All @@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])


expected_df = gb.apply(GrowUp, include_groups=False)
expected_df = gb.apply(GrowUp)
expected_df

`Expanding apply
Expand Down
8 changes: 4 additions & 4 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,7 @@ missing values with the ``ffill()`` method.
).set_index("date")
df_re

df_re.groupby("group").resample("1D", include_groups=False).ffill()
df_re.groupby("group").resample("1D").ffill()

.. _groupby.filter:

Expand Down Expand Up @@ -1252,13 +1252,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare

.. ipython:: python

df.groupby("A", group_keys=True).apply(lambda x: x, include_groups=False)
df.groupby("A", group_keys=True).apply(lambda x: x)

with

.. ipython:: python

df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
df.groupby("A", group_keys=False).apply(lambda x: x)


Numba accelerated routines
Expand Down Expand Up @@ -1742,7 +1742,7 @@ column index name will be used as the name of the inserted column:
result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
return pd.Series(result, name="metrics")

result = df.groupby("a").apply(compute_metrics, include_groups=False)
result = df.groupby("a").apply(compute_metrics)

result

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ Other Removals
- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`)

.. ---------------------------------------------------------------------------
.. _whatsnew_300.performance:
Expand Down
89 changes: 23 additions & 66 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,7 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
# -----------------------------------------------------------------
# apply/agg/transform

def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
def apply(self, func, *args, include_groups: bool = False, **kwargs) -> NDFrameT:
"""
Apply function ``func`` group-wise and combine the results together.

Expand All @@ -1419,18 +1419,17 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
*args : tuple
Optional positional arguments to pass to ``func``.

include_groups : bool, default True
include_groups : bool, default False
When True, will attempt to apply ``func`` to the groupings in
the case that they are columns of the DataFrame. If this raises a
TypeError, the result will be computed with the groupings excluded.
When False, the groupings will be excluded when applying ``func``.

.. versionadded:: 2.2.0

.. deprecated:: 2.2.0
.. versionchanged:: 3.0.0

Setting include_groups to True is deprecated. Only the value
False will be allowed in a future version of pandas.
The default changed from True to False, and True is no longer allowed.

**kwargs : dict
Optional keyword arguments to pass to ``func``.
Expand Down Expand Up @@ -1520,7 +1519,7 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
each group together into a Series, including setting the index as
appropriate:

>>> g1.apply(lambda x: x.C.max() - x.B.min(), include_groups=False)
>>> g1.apply(lambda x: x.C.max() - x.B.min())
A
a 5
b 2
Expand All @@ -1529,11 +1528,13 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
Example 4: The function passed to ``apply`` returns ``None`` for one of the
group. This group is filtered from the result:

>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x)
B C
0 1 4
1 2 6
"""
if include_groups:
raise ValueError("include_groups=True is no longer allowed.")
if isinstance(func, str):
if hasattr(self, func):
res = getattr(self, func)
Expand All @@ -1560,33 +1561,7 @@ def f(g):
else:
f = func

if not include_groups:
return self._python_apply_general(f, self._obj_with_exclusions)

try:
result = self._python_apply_general(f, self._selected_obj)
if (
not isinstance(self.obj, Series)
and self._selection is None
and self._selected_obj.shape != self._obj_with_exclusions.shape
):
warnings.warn(
message=_apply_groupings_depr.format(type(self).__name__, "apply"),
category=DeprecationWarning,
stacklevel=find_stack_level(),
)
except TypeError:
# gh-20949
# try again, with .apply acting as a filtering
# operation, by excluding the grouping column
# This would normally not be triggered
# except if the udf is trying an operation that
# fails on *some* columns, e.g. a numeric operation
# on a string grouper column

return self._python_apply_general(f, self._obj_with_exclusions)

return result
return self._python_apply_general(f, self._obj_with_exclusions)

@final
def _python_apply_general(
Expand Down Expand Up @@ -3424,7 +3399,9 @@ def describe(
return result

@final
def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resampler:
def resample(
self, rule, *args, include_groups: bool = False, **kwargs
) -> Resampler:
"""
Provide resampling when using a TimeGrouper.

Expand All @@ -3449,10 +3426,9 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp

.. versionadded:: 2.2.0

.. deprecated:: 2.2.0
.. versionchanged:: 3.0

Setting include_groups to True is deprecated. Only the value
False will be allowed in a future version of pandas.
The default was changed to False, and True is no longer allowed.

**kwargs
Possible arguments are `how`, `fill_method`, `limit`, `kind` and
Expand Down Expand Up @@ -3485,7 +3461,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
Downsample the DataFrame into 3 minute bins and sum the values of
the timestamps falling into a bin.

>>> df.groupby("a").resample("3min", include_groups=False).sum()
>>> df.groupby("a").resample("3min").sum()
b
a
0 2000-01-01 00:00:00 2
Expand All @@ -3494,7 +3470,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp

Upsample the series into 30 second bins.

>>> df.groupby("a").resample("30s", include_groups=False).sum()
>>> df.groupby("a").resample("30s").sum()
b
a
0 2000-01-01 00:00:00 1
Expand All @@ -3508,7 +3484,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp

Resample by month. Values are assigned to the month of the period.

>>> df.groupby("a").resample("ME", include_groups=False).sum()
>>> df.groupby("a").resample("ME").sum()
b
a
0 2000-01-31 3
Expand All @@ -3517,11 +3493,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
Downsample the series into 3 minute bins as above, but close the right
side of the bin interval.

>>> (
... df.groupby("a")
... .resample("3min", closed="right", include_groups=False)
... .sum()
... )
>>> (df.groupby("a").resample("3min", closed="right").sum())
b
a
0 1999-12-31 23:57:00 1
Expand All @@ -3532,11 +3504,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
the bin interval, but label each bin using the right edge instead of
the left.

>>> (
... df.groupby("a")
... .resample("3min", closed="right", label="right", include_groups=False)
... .sum()
... )
>>> (df.groupby("a").resample("3min", closed="right", label="right").sum())
b
a
0 2000-01-01 00:00:00 1
Expand All @@ -3545,11 +3513,10 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
"""
from pandas.core.resample import get_resampler_for_grouping

# mypy flags that include_groups could be specified via `*args` or `**kwargs`
# GH#54961 would resolve.
return get_resampler_for_grouping( # type: ignore[misc]
self, rule, *args, include_groups=include_groups, **kwargs
)
if include_groups:
raise ValueError("include_groups=True is no longer allowed.")

return get_resampler_for_grouping(self, rule, *args, **kwargs)

@final
def rolling(
Expand Down Expand Up @@ -5561,13 +5528,3 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
mi = MultiIndex(levels=levels, codes=codes, names=[idx.name, None])

return mi


# GH#7155
_apply_groupings_depr = (
"{}.{} operated on the grouping columns. This behavior is deprecated, "
"and in a future version of pandas the grouping columns will be excluded "
"from the operation. Either pass `include_groups=False` to exclude the "
"groupings or explicitly select the grouping columns after groupby to silence "
"this warning."
)
Loading
Loading