@@ -9350,21 +9350,140 @@ def update(
93509350
93519351 # ----------------------------------------------------------------------
93529352 # Data reshaping
9353- @Appender (
9354- dedent (
9355- """
9353+ @deprecate_nonkeyword_arguments (
9354+ Pandas4Warning , allowed_args = ["self" , "by" , "level" ], name = "groupby"
9355+ )
9356+ def groupby (
9357+ self ,
9358+ by = None ,
9359+ level : IndexLabel | None = None ,
9360+ as_index : bool = True ,
9361+ sort : bool = True ,
9362+ group_keys : bool = True ,
9363+ observed : bool = True ,
9364+ dropna : bool = True ,
9365+ ) -> DataFrameGroupBy :
9366+ """
9367+ Group DataFrame using a mapper or by a Series of columns.
9368+
9369+ A groupby operation involves some combination of splitting the
9370+ object, applying a function, and combining the results. This can be
9371+ used to group large amounts of data and compute operations on these
9372+ groups.
9373+
9374+ Parameters
9375+ ----------
9376+ by : mapping, function, label, pd.Grouper or list of such
9377+ Used to determine the groups for the groupby.
9378+ If ``by`` is a function, it's called on each value of the object's
9379+ index. If a dict or Series is passed, the Series or dict VALUES
9380+ will be used to determine the groups (the Series' values are first
9381+ aligned; see ``.align()`` method). If a list or ndarray of length
9382+ equal to the selected axis is passed (see the `groupby user guide
9383+ <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
9384+ the values are used as-is to determine the groups. A label or list
9385+ of labels may be passed to group by the columns in ``self``.
9386+ Notice that a tuple is interpreted as a (single) key.
9387+ level : int, level name, or sequence of such, default None
9388+ If the axis is a MultiIndex (hierarchical), group by a particular
9389+ level or levels. Do not specify both ``by`` and ``level``.
9390+ as_index : bool, default True
9391+ Return object with group labels as the
9392+ index. Only relevant for DataFrame input. as_index=False is
9393+ effectively "SQL-style" grouped output. This argument has no effect
9394+ on filtrations (see the `filtrations in the user guide
9395+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9396+ such as ``head()``, ``tail()``, ``nth()`` and in transformations
9397+ (see the `transformations in the user guide
9398+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9399+ sort : bool, default True
9400+ Sort group keys. Get better performance by turning this off.
9401+ Note this does not influence the order of observations within each
9402+ group. Groupby preserves the order of rows within each group. If False,
9403+ the groups will appear in the same order as they did in the original
9404+ DataFrame.
9405+ This argument has no effect on filtrations (see the `filtrations
9406+ in the user guide
9407+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9408+ such as ``head()``, ``tail()``, ``nth()`` and in transformations
9409+ (see the `transformations in the user guide
9410+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9411+
9412+ .. versionchanged:: 2.0.0
9413+
9414+ Specifying ``sort=False`` with an ordered categorical grouper will no
9415+ longer sort the values.
9416+
9417+ group_keys : bool, default True
9418+ When calling apply and the ``by`` argument produces a like-indexed
9419+ (i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
9420+ index to identify pieces. By default group keys are not included
9421+ when the result's index (and column) labels match the inputs, and
9422+ are included otherwise.
9423+
9424+ .. versionchanged:: 1.5.0
9425+
9426+ Warns that ``group_keys`` will no longer be ignored when the
9427+ result from ``apply`` is a like-indexed Series or DataFrame.
9428+ Specify ``group_keys`` explicitly to include the group keys or
9429+ not.
9430+
9431+ .. versionchanged:: 2.0.0
9432+
9433+ ``group_keys`` now defaults to ``True``.
9434+
9435+ observed : bool, default True
9436+ This only applies if any of the groupers are Categoricals.
9437+ If True: only show observed values for categorical groupers.
9438+ If False: show all values for categorical groupers.
9439+
9440+ .. versionchanged:: 3.0.0
9441+
9442+ The default value is now ``True``.
9443+
9444+ dropna : bool, default True
9445+ If True, and if group keys contain NA values, NA values together
9446+ with row/column will be dropped.
9447+ If False, NA values will also be treated as the key in groups.
9448+
9449+ Returns
9450+ -------
9451+ pandas.api.typing.DataFrameGroupBy
9452+ Returns a groupby object that contains information about the groups.
9453+
9454+ See Also
9455+ --------
9456+ resample : Convenience method for frequency conversion and resampling
9457+ of time series.
9458+
9459+ Notes
9460+ -----
9461+ See the `user guide
9462+ <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
9463+ detailed usage and examples, including splitting an object into groups,
9464+ iterating through groups, selecting a group, aggregation, and more.
9465+
9466+ The implementation of groupby is hash-based, meaning in particular that
9467+ objects that compare as equal will be considered to be in the same group.
9468+ An exception to this is that pandas has special handling of NA values:
9469+ any NA values will be collapsed to a single group, regardless of how
9470+ they compare. See the user guide linked above for more details.
9471+
93569472 Examples
93579473 --------
9358- >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9359- ... 'Parrot', 'Parrot'],
9360- ... 'Max Speed': [380., 370., 24., 26.]})
9474+ >>> df = pd.DataFrame(
9475+ ... {
9476+ ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9477+ ... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9478+ ... }
9479+ ... )
93619480 >>> df
93629481 Animal Max Speed
93639482 0 Falcon 380.0
93649483 1 Falcon 370.0
93659484 2 Parrot 24.0
93669485 3 Parrot 26.0
9367- >>> df.groupby([' Animal' ]).mean()
9486+ >>> df.groupby([" Animal" ]).mean()
93689487 Max Speed
93699488 Animal
93709489 Falcon 375.0
@@ -9375,11 +9494,12 @@ def update(
93759494 We can groupby different levels of a hierarchical index
93769495 using the `level` parameter:
93779496
9378- >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
9379- ... ['Captive', 'Wild', 'Captive', 'Wild']]
9380- >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
9381- >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
9382- ... index=index)
9497+ >>> arrays = [
9498+ ... ["Falcon", "Falcon", "Parrot", "Parrot"],
9499+ ... ["Captive", "Wild", "Captive", "Wild"],
9500+ ... ]
9501+ >>> index = pd.MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
9502+ >>> df = pd.DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
93839503 >>> df
93849504 Max Speed
93859505 Animal Type
@@ -9417,7 +9537,7 @@ def update(
94179537 2.0 2 5
94189538 NaN 1 4
94199539
9420- >>> arr = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
9540+ >>> arr = [["a", 12, 12], [None, 12.3, 33.0 ], ["b", 12.3, 123], ["a", 1, 1]]
94219541 >>> df = pd.DataFrame(arr, columns=["a", "b", "c"])
94229542
94239543 >>> df.groupby(by="a").sum()
@@ -9436,40 +9556,27 @@ def update(
94369556 When using ``.apply()``, use ``group_keys`` to include or exclude the
94379557 group keys. The ``group_keys`` argument defaults to ``True`` (include).
94389558
9439- >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9440- ... 'Parrot', 'Parrot'],
9441- ... 'Max Speed': [380., 370., 24., 26.]})
9442- >>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)
9559+ >>> df = pd.DataFrame(
9560+ ... {
9561+ ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9562+ ... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9563+ ... }
9564+ ... )
9565+ >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x)
94439566 Max Speed
94449567 Animal
94459568 Falcon 0 380.0
94469569 1 370.0
94479570 Parrot 2 24.0
94489571 3 26.0
94499572
9450- >>> df.groupby("Animal", group_keys=False)[[' Max Speed' ]].apply(lambda x: x)
9573+ >>> df.groupby("Animal", group_keys=False)[[" Max Speed" ]].apply(lambda x: x)
94519574 Max Speed
94529575 0 380.0
94539576 1 370.0
94549577 2 24.0
94559578 3 26.0
94569579 """
9457- )
9458- )
9459- @Appender (_shared_docs ["groupby" ] % _shared_doc_kwargs )
9460- @deprecate_nonkeyword_arguments (
9461- Pandas4Warning , allowed_args = ["self" , "by" , "level" ], name = "groupby"
9462- )
9463- def groupby (
9464- self ,
9465- by = None ,
9466- level : IndexLabel | None = None ,
9467- as_index : bool = True ,
9468- sort : bool = True ,
9469- group_keys : bool = True ,
9470- observed : bool = True ,
9471- dropna : bool = True ,
9472- ) -> DataFrameGroupBy :
94739580 from pandas .core .groupby .generic import DataFrameGroupBy
94749581
94759582 if level is None and by is None :
0 commit comments