@@ -1963,7 +1963,7 @@ def _set_name(
1963
1963
"""
1964
1964
)
1965
1965
)
1966
- @ Appender ( _shared_docs [ "groupby" ] % _shared_doc_kwargs )
1966
+
1967
1967
@deprecate_nonkeyword_arguments (
1968
1968
Pandas4Warning , allowed_args = ["self" , "by" , "level" ], name = "groupby"
1969
1969
)
@@ -1977,6 +1977,173 @@ def groupby(
1977
1977
observed : bool = True ,
1978
1978
dropna : bool = True ,
1979
1979
) -> SeriesGroupBy :
1980
+
1981
+ """
1982
+ Group Series using a mapper or by a Series of columns.
1983
+
1984
+ A groupby operation involves some combination of splitting the
1985
+ object, applying a function, and combining the results. This can be
1986
+ used to group large amounts of data and compute operations on these
1987
+ groups.
1988
+
1989
+ Parameters
1990
+ ----------
1991
+ by : mapping, function, label, pd.Grouper or list of such
1992
+ Used to determine the groups for the groupby.
1993
+ If ``by`` is a function, it's called on each value of the object's
1994
+ index. If a dict or Series is passed, the Series or dict VALUES
1995
+ will be used to determine the groups (the Series' values are first
1996
+ aligned; see ``.align()`` method). If a list or ndarray of length
1997
+ equal to the selected axis is passed (see the `groupby user guide
1998
+ <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
1999
+ the values are used as-is to determine the groups. A label or list
2000
+ of labels may be passed to group by the columns in ``self``.
2001
+ Notice that a tuple is interpreted as a (single) key.
2002
+ axis : {0 or 'index', 1 or 'columns'}, default 0
2003
+ Split along rows (0) or columns (1). For `Series` this parameter
2004
+ is unused and defaults to 0.
2005
+ level : int, level name, or sequence of such, default None
2006
+ If the axis is a MultiIndex (hierarchical), group by a particular
2007
+ level or levels. Do not specify both ``by`` and ``level``.
2008
+ as_index : bool, default True
2009
+ For aggregated output, return object with group labels as the
2010
+ index. Only relevant for DataFrame input. as_index=False is
2011
+ effectively "SQL-style" grouped output.
2012
+ sort : bool, default True
2013
+ Sort group keys. Get better performance by turning this off.
2014
+ Note this does not influence the order of observations within each
2015
+ group. Groupby preserves the order of rows within each group.
2016
+
2017
+ .. versionchanged:: 2.0.0
2018
+
2019
+ Specifying ``sort=False`` with an ordered categorical grouper will no
2020
+ longer sort the values.
2021
+
2022
+ group_keys : bool, default True
2023
+ When calling apply and the ``by`` argument produces a like-indexed
2024
+ (i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
2025
+ index to identify pieces. By default group keys are not included
2026
+ when the result's index (and column) labels match the inputs, and
2027
+ are included otherwise.
2028
+
2029
+ .. versionchanged:: 1.5.0
2030
+
2031
+ Warns that ``group_keys`` will no longer be ignored when the
2032
+ result from ``apply`` is a like-indexed Series or DataFrame.
2033
+ Specify ``group_keys`` explicitly to include the group keys or
2034
+ not.
2035
+
2036
+ .. versionchanged:: 2.0.0
2037
+
2038
+ ``group_keys`` now defaults to ``True``.
2039
+
2040
+ observed : bool, default False
2041
+ This only applies if any of the groupers are Categoricals.
2042
+ If True: only show observed values for categorical groupers.
2043
+ If False: show all values for categorical groupers.
2044
+ dropna : bool, default True
2045
+ If True, and if group keys contain NA values, NA values together
2046
+ with row/column will be dropped.
2047
+ If False, NA values will also be treated as the key in groups.
2048
+
2049
+ .. versionadded:: 1.1.0
2050
+
2051
+ Returns
2052
+ -------
2053
+ SeriesGroupBy
2054
+ Returns a groupby object that contains information about the groups.
2055
+
2056
+ See Also
2057
+ --------
2058
+ resample : Convenience method for frequency conversion and resampling
2059
+ of time series.
2060
+
2061
+ Notes
2062
+ -----
2063
+ See the `user guide
2064
+ <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
2065
+ detailed usage and examples, including splitting an object into groups,
2066
+ iterating through groups, selecting a group, aggregation, and more.
2067
+
2068
+ Examples
2069
+ --------
2070
+ >>> ser = pd.Series([390., 350., 30., 20.],
2071
+ ... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
2072
+ >>> ser
2073
+ Falcon 390.0
2074
+ Falcon 350.0
2075
+ Parrot 30.0
2076
+ Parrot 20.0
2077
+ Name: Max Speed, dtype: float64
2078
+ >>> ser.groupby(["a", "b", "a", "b"]).mean()
2079
+ a 210.0
2080
+ b 185.0
2081
+ Name: Max Speed, dtype: float64
2082
+ >>> ser.groupby(level=0).mean()
2083
+ Falcon 370.0
2084
+ Parrot 25.0
2085
+ Name: Max Speed, dtype: float64
2086
+ >>> ser.groupby(ser > 100).mean()
2087
+ Max Speed
2088
+ False 25.0
2089
+ True 370.0
2090
+ Name: Max Speed, dtype: float64
2091
+
2092
+ **Grouping by Indexes**
2093
+
2094
+ We can groupby different levels of a hierarchical index
2095
+ using the `level` parameter:
2096
+
2097
+ >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
2098
+ ... ['Captive', 'Wild', 'Captive', 'Wild']]
2099
+ >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
2100
+ >>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
2101
+ >>> ser
2102
+ Animal Type
2103
+ Falcon Captive 390.0
2104
+ Wild 350.0
2105
+ Parrot Captive 30.0
2106
+ Wild 20.0
2107
+ Name: Max Speed, dtype: float64
2108
+ >>> ser.groupby(level=0).mean()
2109
+ Animal
2110
+ Falcon 370.0
2111
+ Parrot 25.0
2112
+ Name: Max Speed, dtype: float64
2113
+ >>> ser.groupby(level="Type").mean()
2114
+ Type
2115
+ Captive 210.0
2116
+ Wild 185.0
2117
+ Name: Max Speed, dtype: float64
2118
+
2119
+ We can also choose to include `NA` in group keys or not by defining
2120
+ `dropna` parameter, the default setting is `True`.
2121
+
2122
+ >>> ser = pd.Series([1, 2, 3, 3], index=["a", 'a', 'b', np.nan])
2123
+ >>> ser.groupby(level=0).sum()
2124
+ a 3
2125
+ b 3
2126
+ dtype: int64
2127
+
2128
+ >>> ser.groupby(level=0, dropna=False).sum()
2129
+ a 3
2130
+ b 3
2131
+ NaN 3
2132
+ dtype: int64
2133
+
2134
+ >>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot']
2135
+ >>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed")
2136
+ >>> ser.groupby(["a", "b", "a", np.nan]).mean()
2137
+ a 210.0
2138
+ b 350.0
2139
+ Name: Max Speed, dtype: float64
2140
+
2141
+ >>> ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()
2142
+ a 210.0
2143
+ b 350.0
2144
+ NaN 20.0
2145
+ Name: Max Speed, dtype: float64"""
2146
+
1980
2147
from pandas .core .groupby .generic import SeriesGroupBy
1981
2148
1982
2149
if level is None and by is None :
0 commit comments