Skip to content

Commit 211f479

Browse files
Merge branch 'pandas-dev:main' into update-guides-to-ref-relevant-api-doc
2 parents 04b020c + 066a4f7 commit 211f479

File tree

26 files changed

+2643
-1470
lines changed

26 files changed

+2643
-1470
lines changed

.github/workflows/codeql.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ jobs:
2828

2929
steps:
3030
- uses: actions/checkout@v5
31-
- uses: github/codeql-action/init@v3
31+
- uses: github/codeql-action/init@v4
3232
with:
3333
languages: ${{ matrix.language }}
34-
- uses: github/codeql-action/autobuild@v3
35-
- uses: github/codeql-action/analyze@v3
34+
- uses: github/codeql-action/autobuild@v4
35+
- uses: github/codeql-action/analyze@v4

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ jobs:
162162
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
163163

164164
- name: Build wheels
165-
uses: pypa/[email protected].0
165+
uses: pypa/[email protected].1
166166
with:
167167
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
168168
env:

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ Other Deprecations
716716
- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
717717
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`)
718718
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
719+
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
719720
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
720721

721722
.. ---------------------------------------------------------------------------
@@ -1016,8 +1017,8 @@ Strings
10161017
Interval
10171018
^^^^^^^^
10181019
- :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`)
1020+
- Bug in :class:`Index`, :class:`Series`, :class:`DataFrame` constructors when given a sequence of :class:`Interval` subclass objects casting them to :class:`Interval` (:issue:`46945`)
10191021
- Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`)
1020-
-
10211022

10221023
Indexing
10231024
^^^^^^^^

pandas/_libs/lib.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2255,7 +2255,8 @@ cpdef bint is_interval_array(ndarray values):
22552255
for i in range(n):
22562256
val = values[i]
22572257

2258-
if isinstance(val, Interval):
2258+
if type(val) is Interval:
2259+
# GH#46945 catch Interval exactly, excluding subclasses
22592260
if closed is None:
22602261
closed = val.closed
22612262
numeric = (

pandas/_testing/_warnings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ class for all warnings. To raise multiple types of exceptions,
110110
if isinstance(match, tuple)
111111
else (match,) * len(expected_warning)
112112
)
113-
for warning_type, warning_match in zip(expected_warning, match):
113+
for warning_type, warning_match in zip(
114+
expected_warning, match, strict=True
115+
):
114116
_assert_caught_expected_warnings(
115117
caught_warnings=w,
116118
expected_warning=warning_type,

pandas/_testing/asserters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ def _raise(left, right, err_msg) -> NoReturn:
675675
)
676676

677677
diff = 0
678-
for left_arr, right_arr in zip(left, right):
678+
for left_arr, right_arr in zip(left, right, strict=True):
679679
# count up differences
680680
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
681681
diff += 1
@@ -1447,7 +1447,7 @@ def assert_copy(iter1, iter2, **eql_kwargs) -> None:
14471447
the same object. (Does not check that items
14481448
in sequences are also not the same object)
14491449
"""
1450-
for elem1, elem2 in zip(iter1, iter2):
1450+
for elem1, elem2 in zip(iter1, iter2, strict=True):
14511451
assert_almost_equal(elem1, elem2, **eql_kwargs)
14521452
msg = (
14531453
f"Expected object {type(elem1)!r} and object {type(elem2)!r} to be "

pandas/core/config_init.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
is_text,
2929
)
3030

31+
from pandas.errors import Pandas4Warning
32+
3133
# compute
3234

3335
use_bottleneck_doc = """
@@ -899,10 +901,10 @@ def register_converter_cb(key: str) -> None:
899901
cf.register_option(
900902
"no_silent_downcasting",
901903
False,
902-
"Whether to opt-in to the future behavior which will *not* silently "
903-
"downcast results from Series and DataFrame `where`, `mask`, and `clip` "
904-
"methods. "
905-
"Silent downcasting will be removed in pandas 3.0 "
906-
"(at which point this option will be deprecated).",
904+
"This option is deprecated and will be removed in a future version. "
905+
"It has no effect.",
907906
validator=is_one_of_factory([True, False]),
908907
)
908+
909+
# GH#59502
910+
cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning)

pandas/core/frame.py

Lines changed: 141 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9350,21 +9350,140 @@ def update(
93509350

93519351
# ----------------------------------------------------------------------
93529352
# Data reshaping
9353-
@Appender(
9354-
dedent(
9355-
"""
9353+
@deprecate_nonkeyword_arguments(
9354+
Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby"
9355+
)
9356+
def groupby(
9357+
self,
9358+
by=None,
9359+
level: IndexLabel | None = None,
9360+
as_index: bool = True,
9361+
sort: bool = True,
9362+
group_keys: bool = True,
9363+
observed: bool = True,
9364+
dropna: bool = True,
9365+
) -> DataFrameGroupBy:
9366+
"""
9367+
Group DataFrame using a mapper or by a Series of columns.
9368+
9369+
A groupby operation involves some combination of splitting the
9370+
object, applying a function, and combining the results. This can be
9371+
used to group large amounts of data and compute operations on these
9372+
groups.
9373+
9374+
Parameters
9375+
----------
9376+
by : mapping, function, label, pd.Grouper or list of such
9377+
Used to determine the groups for the groupby.
9378+
If ``by`` is a function, it's called on each value of the object's
9379+
index. If a dict or Series is passed, the Series or dict VALUES
9380+
will be used to determine the groups (the Series' values are first
9381+
aligned; see ``.align()`` method). If a list or ndarray of length
9382+
equal to the selected axis is passed (see the `groupby user guide
9383+
<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
9384+
the values are used as-is to determine the groups. A label or list
9385+
of labels may be passed to group by the columns in ``self``.
9386+
Notice that a tuple is interpreted as a (single) key.
9387+
level : int, level name, or sequence of such, default None
9388+
If the axis is a MultiIndex (hierarchical), group by a particular
9389+
level or levels. Do not specify both ``by`` and ``level``.
9390+
as_index : bool, default True
9391+
Return object with group labels as the
9392+
index. Only relevant for DataFrame input. as_index=False is
9393+
effectively "SQL-style" grouped output. This argument has no effect
9394+
on filtrations (see the `filtrations in the user guide
9395+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9396+
such as ``head()``, ``tail()``, ``nth()`` and in transformations
9397+
(see the `transformations in the user guide
9398+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9399+
sort : bool, default True
9400+
Sort group keys. Get better performance by turning this off.
9401+
Note this does not influence the order of observations within each
9402+
group. Groupby preserves the order of rows within each group. If False,
9403+
the groups will appear in the same order as they did in the original
9404+
DataFrame.
9405+
This argument has no effect on filtrations (see the `filtrations
9406+
in the user guide
9407+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9408+
such as ``head()``, ``tail()``, ``nth()`` and in transformations
9409+
(see the `transformations in the user guide
9410+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9411+
9412+
.. versionchanged:: 2.0.0
9413+
9414+
Specifying ``sort=False`` with an ordered categorical grouper will no
9415+
longer sort the values.
9416+
9417+
group_keys : bool, default True
9418+
When calling apply and the ``by`` argument produces a like-indexed
9419+
(i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
9420+
index to identify pieces. By default group keys are not included
9421+
when the result's index (and column) labels match the inputs, and
9422+
are included otherwise.
9423+
9424+
.. versionchanged:: 1.5.0
9425+
9426+
Warns that ``group_keys`` will no longer be ignored when the
9427+
result from ``apply`` is a like-indexed Series or DataFrame.
9428+
Specify ``group_keys`` explicitly to include the group keys or
9429+
not.
9430+
9431+
.. versionchanged:: 2.0.0
9432+
9433+
``group_keys`` now defaults to ``True``.
9434+
9435+
observed : bool, default True
9436+
This only applies if any of the groupers are Categoricals.
9437+
If True: only show observed values for categorical groupers.
9438+
If False: show all values for categorical groupers.
9439+
9440+
.. versionchanged:: 3.0.0
9441+
9442+
The default value is now ``True``.
9443+
9444+
dropna : bool, default True
9445+
If True, and if group keys contain NA values, NA values together
9446+
with row/column will be dropped.
9447+
If False, NA values will also be treated as the key in groups.
9448+
9449+
Returns
9450+
-------
9451+
pandas.api.typing.DataFrameGroupBy
9452+
Returns a groupby object that contains information about the groups.
9453+
9454+
See Also
9455+
--------
9456+
resample : Convenience method for frequency conversion and resampling
9457+
of time series.
9458+
9459+
Notes
9460+
-----
9461+
See the `user guide
9462+
<https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
9463+
detailed usage and examples, including splitting an object into groups,
9464+
iterating through groups, selecting a group, aggregation, and more.
9465+
9466+
The implementation of groupby is hash-based, meaning in particular that
9467+
objects that compare as equal will be considered to be in the same group.
9468+
An exception to this is that pandas has special handling of NA values:
9469+
any NA values will be collapsed to a single group, regardless of how
9470+
they compare. See the user guide linked above for more details.
9471+
93569472
Examples
93579473
--------
9358-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9359-
... 'Parrot', 'Parrot'],
9360-
... 'Max Speed': [380., 370., 24., 26.]})
9474+
>>> df = pd.DataFrame(
9475+
... {
9476+
... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9477+
... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9478+
... }
9479+
... )
93619480
>>> df
93629481
Animal Max Speed
93639482
0 Falcon 380.0
93649483
1 Falcon 370.0
93659484
2 Parrot 24.0
93669485
3 Parrot 26.0
9367-
>>> df.groupby(['Animal']).mean()
9486+
>>> df.groupby(["Animal"]).mean()
93689487
Max Speed
93699488
Animal
93709489
Falcon 375.0
@@ -9375,11 +9494,12 @@ def update(
93759494
We can groupby different levels of a hierarchical index
93769495
using the `level` parameter:
93779496
9378-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
9379-
... ['Captive', 'Wild', 'Captive', 'Wild']]
9380-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
9381-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
9382-
... index=index)
9497+
>>> arrays = [
9498+
... ["Falcon", "Falcon", "Parrot", "Parrot"],
9499+
... ["Captive", "Wild", "Captive", "Wild"],
9500+
... ]
9501+
>>> index = pd.MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
9502+
>>> df = pd.DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
93839503
>>> df
93849504
Max Speed
93859505
Animal Type
@@ -9417,7 +9537,7 @@ def update(
94179537
2.0 2 5
94189538
NaN 1 4
94199539
9420-
>>> arr = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
9540+
>>> arr = [["a", 12, 12], [None, 12.3, 33.0], ["b", 12.3, 123], ["a", 1, 1]]
94219541
>>> df = pd.DataFrame(arr, columns=["a", "b", "c"])
94229542
94239543
>>> df.groupby(by="a").sum()
@@ -9436,40 +9556,27 @@ def update(
94369556
When using ``.apply()``, use ``group_keys`` to include or exclude the
94379557
group keys. The ``group_keys`` argument defaults to ``True`` (include).
94389558
9439-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9440-
... 'Parrot', 'Parrot'],
9441-
... 'Max Speed': [380., 370., 24., 26.]})
9442-
>>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)
9559+
>>> df = pd.DataFrame(
9560+
... {
9561+
... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9562+
... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9563+
... }
9564+
... )
9565+
>>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x)
94439566
Max Speed
94449567
Animal
94459568
Falcon 0 380.0
94469569
1 370.0
94479570
Parrot 2 24.0
94489571
3 26.0
94499572
9450-
>>> df.groupby("Animal", group_keys=False)[['Max Speed']].apply(lambda x: x)
9573+
>>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x)
94519574
Max Speed
94529575
0 380.0
94539576
1 370.0
94549577
2 24.0
94559578
3 26.0
94569579
"""
9457-
)
9458-
)
9459-
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
9460-
@deprecate_nonkeyword_arguments(
9461-
Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby"
9462-
)
9463-
def groupby(
9464-
self,
9465-
by=None,
9466-
level: IndexLabel | None = None,
9467-
as_index: bool = True,
9468-
sort: bool = True,
9469-
group_keys: bool = True,
9470-
observed: bool = True,
9471-
dropna: bool = True,
9472-
) -> DataFrameGroupBy:
94739580
from pandas.core.groupby.generic import DataFrameGroupBy
94749581

94759582
if level is None and by is None:

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7627,8 +7627,8 @@ def replace(
76277627
# Operate column-wise
76287628
if self.ndim == 1:
76297629
raise ValueError(
7630-
"Series.replace cannot use dict-like to_replace "
7631-
"and non-None value"
7630+
"Series.replace cannot specify both a dict-like "
7631+
"'to_replace' and a 'value'"
76327632
)
76337633
mapping = {
76347634
col: (to_rep, value) for col, to_rep in to_replace.items()

pandas/core/indexes/interval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,8 +1056,8 @@ def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
10561056
first_nan_loc = np.arange(len(self))[self.isna()][0]
10571057
mask[first_nan_loc] = True
10581058

1059-
other_tups = set(zip(other.left, other.right))
1060-
for i, tup in enumerate(zip(self.left, self.right)):
1059+
other_tups = set(zip(other.left, other.right, strict=True))
1060+
for i, tup in enumerate(zip(self.left, self.right, strict=True)):
10611061
if tup in other_tups:
10621062
mask[i] = True
10631063

0 commit comments

Comments
 (0)