Skip to content

Commit 300d9d8

Browse files
authored
Merge branch 'main' into pytimedelta
2 parents e750856 + d538a1c commit 300d9d8

File tree

5 files changed

+320
-170
lines changed

5 files changed

+320
-170
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9797
-i "pandas.Series.dt.unit GL08" \
9898
-i "pandas.Series.pad PR01,SA01" \
9999
-i "pandas.Series.sparse.from_coo PR07,SA01" \
100-
-i "pandas.Series.sparse.npoints SA01" \
101100
-i "pandas.Timedelta.max PR02" \
102101
-i "pandas.Timedelta.min PR02" \
103102
-i "pandas.Timedelta.resolution PR02" \
@@ -127,8 +126,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
127126
-i "pandas.arrays.SparseArray PR07,SA01" \
128127
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
129128
-i "pandas.core.groupby.DataFrameGroupBy.__iter__ RT03,SA01" \
130-
-i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
131-
-i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
132129
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
133130
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
134131
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
@@ -139,8 +136,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
139136
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
140137
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
141138
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
142-
-i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
143-
-i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
144139
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
145140
-i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
146141
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \

pandas/core/arrays/sparse/array.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,18 @@ def npoints(self) -> int:
708708
"""
709709
The number of non- ``fill_value`` points.
710710
711+
This property returns the number of elements in the sparse series that are
712+
not equal to the ``fill_value``. Sparse data structures store only the
713+
non-``fill_value`` elements, reducing memory usage when the majority of
714+
values are the same.
715+
716+
See Also
717+
--------
718+
Series.sparse.to_dense : Convert a Series from sparse values to dense.
719+
Series.sparse.fill_value : Elements in ``data`` that are ``fill_value`` are
720+
not stored.
721+
Series.sparse.density : The percent of non- ``fill_value`` points, as decimal.
722+
711723
Examples
712724
--------
713725
>>> from pandas.arrays import SparseArray

pandas/core/groupby/generic.py

Lines changed: 308 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@
6767
from pandas.core.groupby.groupby import (
6868
GroupBy,
6969
GroupByPlot,
70-
_agg_template_frame,
71-
_agg_template_series,
7270
_transform_template,
7371
)
7472
from pandas.core.indexes.api import (
@@ -324,8 +322,141 @@ def apply(self, func, *args, **kwargs) -> Series:
324322
"""
325323
return super().apply(func, *args, **kwargs)
326324

327-
@doc(_agg_template_series, examples=_agg_examples_doc, klass="Series")
328325
def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
326+
"""
327+
Aggregate using one or more operations.
328+
329+
The ``aggregate`` method enables flexible and efficient aggregation of grouped
330+
data using a variety of functions, including built-in, user-defined, and
331+
optimized JIT-compiled functions.
332+
333+
Parameters
334+
----------
335+
func : function, str, list, dict or None
336+
Function to use for aggregating the data. If a function, must either
337+
work when passed a Series or when passed to Series.apply.
338+
339+
Accepted combinations are:
340+
341+
- function
342+
- string function name
343+
- list of functions and/or function names, e.g. ``[np.sum, 'mean']``
344+
- None, in which case ``**kwargs`` are used with Named Aggregation. Here
345+
the output has one column for each element in ``**kwargs``. The name of
346+
the column is keyword, whereas the value determines the aggregation
347+
used to compute the values in the column.
348+
349+
Can also accept a Numba JIT function with
350+
``engine='numba'`` specified. Only passing a single function is supported
351+
with this engine.
352+
353+
If the ``'numba'`` engine is chosen, the function must be
354+
a user defined function with ``values`` and ``index`` as the
355+
first and second arguments respectively in the function signature.
356+
Each group's index will be passed to the user defined function
357+
and optionally available for use.
358+
359+
.. deprecated:: 2.1.0
360+
361+
Passing a dictionary is deprecated and will raise in a future version
362+
of pandas. Pass a list of aggregations instead.
363+
*args
364+
Positional arguments to pass to func.
365+
engine : str, default None
366+
* ``'cython'`` : Runs the function through C-extensions from cython.
367+
* ``'numba'`` : Runs the function through JIT compiled code from numba.
368+
* ``None`` : Defaults to ``'cython'`` or globally setting
369+
``compute.use_numba``
370+
371+
engine_kwargs : dict, default None
372+
* For ``'cython'`` engine, there are no accepted ``engine_kwargs``
373+
* For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
374+
and ``parallel`` dictionary keys. The values must either be ``True`` or
375+
``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
376+
``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
377+
applied to the function
378+
379+
**kwargs
380+
* If ``func`` is None, ``**kwargs`` are used to define the output names and
381+
aggregations via Named Aggregation. See ``func`` entry.
382+
* Otherwise, keyword arguments to be passed into func.
383+
384+
Returns
385+
-------
386+
Series
387+
Aggregated Series based on the grouping and the applied aggregation
388+
functions.
389+
390+
See Also
391+
--------
392+
SeriesGroupBy.apply : Apply function func group-wise
393+
and combine the results together.
394+
SeriesGroupBy.transform : Transforms the Series on each group
395+
based on the given function.
396+
Series.aggregate : Aggregate using one or more operations.
397+
398+
Notes
399+
-----
400+
When using ``engine='numba'``, there will be no "fall back" behavior internally.
401+
The group data and group index will be passed as numpy arrays to the JITed
402+
user defined function, and no alternative execution attempts will be tried.
403+
404+
Functions that mutate the passed object can produce unexpected
405+
behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
406+
for more details.
407+
408+
.. versionchanged:: 1.3.0
409+
410+
The resulting dtype will reflect the return value of the passed ``func``,
411+
see the examples below.
412+
413+
Examples
414+
--------
415+
>>> s = pd.Series([1, 2, 3, 4])
416+
417+
>>> s
418+
0 1
419+
1 2
420+
2 3
421+
3 4
422+
dtype: int64
423+
424+
>>> s.groupby([1, 1, 2, 2]).min()
425+
1 1
426+
2 3
427+
dtype: int64
428+
429+
>>> s.groupby([1, 1, 2, 2]).agg("min")
430+
1 1
431+
2 3
432+
dtype: int64
433+
434+
>>> s.groupby([1, 1, 2, 2]).agg(["min", "max"])
435+
min max
436+
1 1 2
437+
2 3 4
438+
439+
The output column names can be controlled by passing
440+
the desired column names and aggregations as keyword arguments.
441+
442+
>>> s.groupby([1, 1, 2, 2]).agg(
443+
... minimum="min",
444+
... maximum="max",
445+
... )
446+
minimum maximum
447+
1 1 2
448+
2 3 4
449+
450+
.. versionchanged:: 1.3.0
451+
452+
The resulting dtype will reflect the return value of the aggregating
453+
function.
454+
455+
>>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min())
456+
1 1.0
457+
2 3.0
458+
dtype: float64
459+
"""
329460
relabeling = func is None
330461
columns = None
331462
if relabeling:
@@ -1515,8 +1646,181 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
15151646
"""
15161647
)
15171648

1518-
@doc(_agg_template_frame, examples=_agg_examples_doc, klass="DataFrame")
15191649
def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
1650+
"""
1651+
Aggregate using one or more operations.
1652+
1653+
The ``aggregate`` function allows the application of one or more aggregation
1654+
operations on groups of data within a DataFrameGroupBy object. It supports
1655+
various aggregation methods, including user-defined functions and predefined
1656+
functions such as 'sum', 'mean', etc.
1657+
1658+
Parameters
1659+
----------
1660+
func : function, str, list, dict or None
1661+
Function to use for aggregating the data. If a function, must either
1662+
work when passed a DataFrame or when passed to DataFrame.apply.
1663+
1664+
Accepted combinations are:
1665+
1666+
- function
1667+
- string function name
1668+
- list of functions and/or function names, e.g. ``[np.sum, 'mean']``
1669+
- dict of index labels -> functions, function names or list of such.
1670+
- None, in which case ``**kwargs`` are used with Named Aggregation. Here the
1671+
output has one column for each element in ``**kwargs``. The name of the
1672+
column is keyword, whereas the value determines the aggregation used to
1673+
compute the values in the column.
1674+
1675+
Can also accept a Numba JIT function with
1676+
``engine='numba'`` specified. Only passing a single function is supported
1677+
with this engine.
1678+
1679+
If the ``'numba'`` engine is chosen, the function must be
1680+
a user defined function with ``values`` and ``index`` as the
1681+
first and second arguments respectively in the function signature.
1682+
Each group's index will be passed to the user defined function
1683+
and optionally available for use.
1684+
1685+
*args
1686+
Positional arguments to pass to func.
1687+
engine : str, default None
1688+
* ``'cython'`` : Runs the function through C-extensions from cython.
1689+
* ``'numba'`` : Runs the function through JIT compiled code from numba.
1690+
* ``None`` : Defaults to ``'cython'`` or globally setting
1691+
``compute.use_numba``
1692+
1693+
engine_kwargs : dict, default None
1694+
* For ``'cython'`` engine, there are no accepted ``engine_kwargs``
1695+
* For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
1696+
and ``parallel`` dictionary keys. The values must either be ``True`` or
1697+
``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
1698+
``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
1699+
applied to the function
1700+
1701+
**kwargs
1702+
* If ``func`` is None, ``**kwargs`` are used to define the output names and
1703+
aggregations via Named Aggregation. See ``func`` entry.
1704+
* Otherwise, keyword arguments to be passed into func.
1705+
1706+
Returns
1707+
-------
1708+
DataFrame
1709+
Aggregated DataFrame based on the grouping and the applied aggregation
1710+
functions.
1711+
1712+
See Also
1713+
--------
1714+
DataFrame.groupby.apply : Apply function func group-wise
1715+
and combine the results together.
1716+
DataFrame.groupby.transform : Transforms the Series on each group
1717+
based on the given function.
1718+
DataFrame.aggregate : Aggregate using one or more operations.
1719+
1720+
Notes
1721+
-----
1722+
When using ``engine='numba'``, there will be no "fall back" behavior internally.
1723+
The group data and group index will be passed as numpy arrays to the JITed
1724+
user defined function, and no alternative execution attempts will be tried.
1725+
1726+
Functions that mutate the passed object can produce unexpected
1727+
behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
1728+
for more details.
1729+
1730+
.. versionchanged:: 1.3.0
1731+
1732+
The resulting dtype will reflect the return value of the passed ``func``,
1733+
see the examples below.
1734+
1735+
Examples
1736+
--------
1737+
>>> data = {
1738+
... "A": [1, 1, 2, 2],
1739+
... "B": [1, 2, 3, 4],
1740+
... "C": [0.362838, 0.227877, 1.267767, -0.562860],
1741+
... }
1742+
>>> df = pd.DataFrame(data)
1743+
>>> df
1744+
A B C
1745+
0 1 1 0.362838
1746+
1 1 2 0.227877
1747+
2 2 3 1.267767
1748+
3 2 4 -0.562860
1749+
1750+
The aggregation is for each column.
1751+
1752+
>>> df.groupby("A").agg("min")
1753+
B C
1754+
A
1755+
1 1 0.227877
1756+
2 3 -0.562860
1757+
1758+
Multiple aggregations
1759+
1760+
>>> df.groupby("A").agg(["min", "max"])
1761+
B C
1762+
min max min max
1763+
A
1764+
1 1 2 0.227877 0.362838
1765+
2 3 4 -0.562860 1.267767
1766+
1767+
Select a column for aggregation
1768+
1769+
>>> df.groupby("A").B.agg(["min", "max"])
1770+
min max
1771+
A
1772+
1 1 2
1773+
2 3 4
1774+
1775+
User-defined function for aggregation
1776+
1777+
>>> df.groupby("A").agg(lambda x: sum(x) + 2)
1778+
B C
1779+
A
1780+
1 5 2.590715
1781+
2 9 2.704907
1782+
1783+
Different aggregations per column
1784+
1785+
>>> df.groupby("A").agg({"B": ["min", "max"], "C": "sum"})
1786+
B C
1787+
min max sum
1788+
A
1789+
1 1 2 0.590715
1790+
2 3 4 0.704907
1791+
1792+
To control the output names with different aggregations per column,
1793+
pandas supports "named aggregation"
1794+
1795+
>>> df.groupby("A").agg(
1796+
... b_min=pd.NamedAgg(column="B", aggfunc="min"),
1797+
... c_sum=pd.NamedAgg(column="C", aggfunc="sum"),
1798+
... )
1799+
b_min c_sum
1800+
A
1801+
1 1 0.590715
1802+
2 3 0.704907
1803+
1804+
- The keywords are the *output* column names
1805+
- The values are tuples whose first element is the column to select
1806+
and the second element is the aggregation to apply to that column.
1807+
Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
1808+
``['column', 'aggfunc']`` to make it clearer what the arguments are.
1809+
As usual, the aggregation can be a callable or a string alias.
1810+
1811+
See :ref:`groupby.aggregate.named` for more.
1812+
1813+
.. versionchanged:: 1.3.0
1814+
1815+
The resulting dtype will reflect the return value of the aggregating
1816+
function.
1817+
1818+
>>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
1819+
B
1820+
A
1821+
1 1.0
1822+
2 3.0
1823+
"""
15201824
relabeling, func, columns, order = reconstruct_func(func, **kwargs)
15211825
func = maybe_mangle_lambdas(func)
15221826

0 commit comments

Comments
 (0)