Skip to content

Commit 693b081

Browse files
author
Roline Stapny Saldanha
committed
Merge branch 'main' into sroline_issue_60923
2 parents ecabf81 + 0eaca9e commit 693b081

File tree

12 files changed

+125
-102
lines changed

12 files changed

+125
-102
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ jobs:
162162
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
163163

164164
- name: Build wheels
165-
uses: pypa/cibuildwheel@v2.23.3
165+
uses: pypa/cibuildwheel@v3.1.1
166166
with:
167167
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
168168
env:

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,7 @@ Timezones
731731

732732
Numeric
733733
^^^^^^^
734+
- Bug in :func:`api.types.infer_dtype` returning "mixed" for complex and ``pd.NA`` mix (:issue:`61976`)
734735
- Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`)
735736
- Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`)
736737
- Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`)
@@ -941,6 +942,7 @@ Other
941942
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
942943
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
943944
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
945+
- Bug in ``Series.replace`` when the Series was created from an :class:`Index` and Copy-On-Write is enabled (:issue:`61622`)
944946
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
945947
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
946948
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)

pandas/_libs/lib.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1974,9 +1974,11 @@ cdef class ComplexValidator(Validator):
19741974
return cnp.PyDataType_ISCOMPLEX(self.dtype)
19751975

19761976

1977-
cdef bint is_complex_array(ndarray values):
1977+
cdef bint is_complex_array(ndarray values, bint skipna=True):
19781978
cdef:
1979-
ComplexValidator validator = ComplexValidator(values.size, values.dtype)
1979+
ComplexValidator validator = ComplexValidator(values.size,
1980+
values.dtype,
1981+
skipna=skipna)
19801982
return validator.validate(values)
19811983

19821984

pandas/core/frame.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7173,35 +7173,43 @@ def sort_values(
71737173
`natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
71747174
This can be done using
71757175
``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
7176-
which provides sorted indices according
7177-
to their natural order, as shown below:
7176+
which provides a function to generate a key
7177+
to sort data in their natural order:
71787178
71797179
>>> df = pd.DataFrame(
71807180
... {
7181-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
7182-
... "value": [10, 20, 30, 40, 50],
7181+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
7182+
... "mins": [
7183+
... "10mins",
7184+
... "40mins",
7185+
... "40mins",
7186+
... "40mins",
7187+
... "10mins",
7188+
... "10mins",
7189+
... ],
7190+
... "value": [10, 20, 30, 40, 50, 60],
71837191
... }
71847192
... )
71857193
>>> df
7186-
time value
7187-
0 0hr 10
7188-
1 128hr 20
7189-
2 72hr 30
7190-
3 48hr 40
7191-
4 96hr 50
7192-
>>> from natsort import index_natsorted
7193-
>>> index_natsorted(df["time"])
7194-
[0, 3, 2, 4, 1]
7194+
hours mins value
7195+
0 0hr 10mins 10
7196+
1 128hr 40mins 20
7197+
2 0hr 40mins 30
7198+
3 64hr 40mins 40
7199+
4 64hr 10mins 50
7200+
5 128hr 10mins 60
7201+
>>> from natsort import natsort_keygen
71957202
>>> df.sort_values(
7196-
... by="time",
7197-
... key=lambda x: np.argsort(index_natsorted(x)),
7203+
... by=["hours", "mins"],
7204+
... key=natsort_keygen(),
71987205
... )
7199-
time value
7200-
0 0hr 10
7201-
3 48hr 40
7202-
2 72hr 30
7203-
4 96hr 50
7204-
1 128hr 20
7206+
hours mins value
7207+
0 0hr 10mins 10
7208+
2 0hr 40mins 30
7209+
4 64hr 10mins 50
7210+
3 64hr 40mins 40
7211+
5 128hr 10mins 60
7212+
1 128hr 40mins 20
72057213
"""
72067214
inplace = validate_bool_kwarg(inplace, "inplace")
72077215
axis = self._get_axis_number(axis)

pandas/core/generic.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5004,27 +5004,38 @@ def sort_values(
50045004
50055005
>>> df = pd.DataFrame(
50065006
... {
5007-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
5008-
... "value": [10, 20, 30, 40, 50],
5007+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
5008+
... "mins": [
5009+
... "10mins",
5010+
... "40mins",
5011+
... "40mins",
5012+
... "40mins",
5013+
... "10mins",
5014+
... "10mins",
5015+
... ],
5016+
... "value": [10, 20, 30, 40, 50, 60],
50095017
... }
50105018
... )
50115019
>>> df
5012-
time value
5013-
0 0hr 10
5014-
1 128hr 20
5015-
2 72hr 30
5016-
3 48hr 40
5017-
4 96hr 50
5018-
>>> from natsort import index_natsorted
5020+
hours mins value
5021+
0 0hr 10mins 10
5022+
1 128hr 40mins 20
5023+
2 0hr 40mins 30
5024+
3 64hr 40mins 40
5025+
4 64hr 10mins 50
5026+
5 128hr 10mins 60
5027+
>>> from natsort import natsort_keygen
50195028
>>> df.sort_values(
5020-
... by="time", key=lambda x: np.argsort(index_natsorted(df["time"]))
5029+
... by=["hours", "mins"],
5030+
... key=natsort_keygen(),
50215031
... )
5022-
time value
5023-
0 0hr 10
5024-
3 48hr 40
5025-
2 72hr 30
5026-
4 96hr 50
5027-
1 128hr 20
5032+
hours mins value
5033+
0 0hr 10mins 10
5034+
2 0hr 40mins 30
5035+
4 64hr 10mins 50
5036+
3 64hr 40mins 40
5037+
5 128hr 10mins 60
5038+
1 128hr 40mins 20
50285039
"""
50295040
raise AbstractMethodError(self)
50305041

pandas/core/internals/blocks.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
final,
1111
)
1212
import warnings
13-
import weakref
1413

1514
import numpy as np
1615

@@ -863,14 +862,22 @@ def replace_list(
863862
)
864863

865864
if i != src_len:
866-
# This is ugly, but we have to get rid of intermediate refs
867-
# that did not go out of scope yet, otherwise we will trigger
868-
# many unnecessary copies
865+
# This is ugly, but we have to get rid of intermediate refs. We
866+
# can simply clear the referenced_blocks if we already copied,
867+
# otherwise we have to remove ourselves
868+
self_blk_ids = {
869+
id(b()): i for i, b in enumerate(self.refs.referenced_blocks)
870+
}
869871
for b in result:
870-
ref = weakref.ref(b)
871-
b.refs.referenced_blocks.pop(
872-
b.refs.referenced_blocks.index(ref)
873-
)
872+
if b.refs is self.refs:
873+
# We are still sharing memory with self
874+
if id(b) in self_blk_ids:
875+
# Remove ourselves from the refs; we are temporary
876+
self.refs.referenced_blocks.pop(self_blk_ids[id(b)])
877+
else:
878+
# We have already copied, so we can clear the refs to avoid
879+
# future copies
880+
b.refs.referenced_blocks.clear()
874881
new_rb.extend(result)
875882
rb = new_rb
876883
return rb

pandas/core/strings/accessor.py

Lines changed: 15 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,9 +1242,9 @@ def contains(
12421242
Flags to pass through to the re module, e.g. re.IGNORECASE.
12431243
na : scalar, optional
12441244
Fill value for missing values. The default depends on dtype of the
1245-
array. For object-dtype, ``numpy.nan`` is used. For the nullable
1246-
``StringDtype``, ``pandas.NA`` is used. For the ``"str"`` dtype,
1247-
``False`` is used.
1245+
array. For the ``"str"`` dtype, ``False`` is used. For object
1246+
dtype, ``numpy.nan`` is used. For the nullable ``StringDtype``,
1247+
``pandas.NA`` is used.
12481248
regex : bool, default True
12491249
If True, assumes the pat is a regular expression.
12501250
@@ -1293,18 +1293,6 @@ def contains(
12931293
4 False
12941294
dtype: bool
12951295
1296-
Specifying `na` to be `False` instead of `NaN` replaces NaN values
1297-
with `False`. If Series or Index does not contain NaN values
1298-
the resultant dtype will be `bool`, otherwise, an `object` dtype.
1299-
1300-
>>> s1.str.contains("og", na=False, regex=True)
1301-
0 False
1302-
1 True
1303-
2 False
1304-
3 False
1305-
4 False
1306-
dtype: bool
1307-
13081296
Returning 'house' or 'dog' when either expression occurs in a string.
13091297
13101298
>>> s1.str.contains("house|dog", regex=True)
@@ -1381,9 +1369,9 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
13811369
Regex module flags, e.g. re.IGNORECASE.
13821370
na : scalar, optional
13831371
Fill value for missing values. The default depends on dtype of the
1384-
array. For object-dtype, ``numpy.nan`` is used. For the nullable
1385-
``StringDtype``, ``pandas.NA`` is used. For the ``"str"`` dtype,
1386-
``False`` is used.
1372+
array. For the ``"str"`` dtype, ``False`` is used. For object
1373+
dtype, ``numpy.nan`` is used. For the nullable ``StringDtype``,
1374+
``pandas.NA`` is used.
13871375
13881376
Returns
13891377
-------
@@ -1431,9 +1419,9 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=lib.no_default):
14311419
Regex module flags, e.g. re.IGNORECASE.
14321420
na : scalar, optional
14331421
Fill value for missing values. The default depends on dtype of the
1434-
array. For object-dtype, ``numpy.nan`` is used. For the nullable
1435-
``StringDtype``, ``pandas.NA`` is used. For the ``"str"`` dtype,
1436-
``False`` is used.
1422+
array. For the ``"str"`` dtype, ``False`` is used. For object
1423+
dtype, ``numpy.nan`` is used. For the nullable ``StringDtype``,
1424+
``pandas.NA`` is used.
14371425
14381426
Returns
14391427
-------
@@ -2671,9 +2659,9 @@ def startswith(
26712659
accepted.
26722660
na : scalar, optional
26732661
Object shown if element tested is not a string. The default depends
2674-
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
2675-
For the nullable ``StringDtype``, ``pandas.NA`` is used.
2676-
For the ``"str"`` dtype, ``False`` is used.
2662+
on dtype of the array. For the ``"str"`` dtype, ``False`` is used.
2663+
For object dtype, ``numpy.nan`` is used. For the nullable
2664+
``StringDtype``, ``pandas.NA`` is used.
26772665
26782666
Returns
26792667
-------
@@ -2710,15 +2698,6 @@ def startswith(
27102698
2 False
27112699
3 False
27122700
dtype: bool
2713-
2714-
Specifying `na` to be `False` instead of `NaN`.
2715-
2716-
>>> s.str.startswith("b", na=False)
2717-
0 True
2718-
1 False
2719-
2 False
2720-
3 False
2721-
dtype: bool
27222701
"""
27232702
if not isinstance(pat, (str, tuple)):
27242703
msg = f"expected a string or tuple, not {type(pat).__name__}"
@@ -2742,9 +2721,9 @@ def endswith(
27422721
accepted.
27432722
na : scalar, optional
27442723
Object shown if element tested is not a string. The default depends
2745-
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
2746-
For the nullable ``StringDtype``, ``pandas.NA`` is used.
2747-
For the ``"str"`` dtype, ``False`` is used.
2724+
on dtype of the array. For the ``"str"`` dtype, ``False`` is used.
2725+
For object dtype, ``numpy.nan`` is used. For the nullable
2726+
``StringDtype``, ``pandas.NA`` is used.
27482727
27492728
Returns
27502729
-------
@@ -2781,15 +2760,6 @@ def endswith(
27812760
2 True
27822761
3 False
27832762
dtype: bool
2784-
2785-
Specifying `na` to be `False` instead of `NaN`.
2786-
2787-
>>> s.str.endswith("t", na=False)
2788-
0 True
2789-
1 False
2790-
2 False
2791-
3 False
2792-
dtype: bool
27932763
"""
27942764
if not isinstance(pat, (str, tuple)):
27952765
msg = f"expected a string or tuple, not {type(pat).__name__}"

pandas/tests/dtypes/test_inference.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,6 +1405,10 @@ def test_infer_dtype_numeric_with_na(self, na_value):
14051405
ser = Series([1.0, 2.0, na_value], dtype=object)
14061406
assert lib.infer_dtype(ser, skipna=True) == "floating"
14071407

1408+
# GH#61976
1409+
ser = Series([1 + 1j, na_value], dtype=object)
1410+
assert lib.infer_dtype(ser, skipna=True) == "complex"
1411+
14081412
def test_infer_dtype_all_nan_nat_like(self):
14091413
arr = np.array([np.nan, np.nan])
14101414
assert lib.infer_dtype(arr, skipna=True) == "floating"

pandas/tests/extension/test_period.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
Period,
2626
iNaT,
2727
)
28-
from pandas.compat import is_platform_windows
2928

3029
from pandas.core.dtypes.dtypes import PeriodDtype
3130

@@ -102,12 +101,10 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
102101
return super().check_reduce(ser, op_name, skipna)
103102

104103
@pytest.mark.parametrize("periods", [1, -2])
105-
def test_diff(self, data, periods):
106-
if is_platform_windows():
107-
with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
108-
super().test_diff(data, periods)
109-
else:
110-
super().test_diff(data, periods)
104+
# NOTE: RuntimeWarning on Windows(non-ARM) platforms (in CI)
105+
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
106+
def test_diff(self, request, data, periods):
107+
super().test_diff(data, periods)
111108

112109
@pytest.mark.parametrize("na_action", [None, "ignore"])
113110
def test_map(self, data, na_action):

pandas/tests/series/methods/test_replace.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
import pandas.util._test_decorators as td
7+
68
import pandas as pd
79
import pandas._testing as tm
810
from pandas.core.arrays import IntervalArray
@@ -715,3 +717,12 @@ def test_replace_all_NA(self):
715717
result = df.replace({r"^#": "$"}, regex=True)
716718
expected = pd.Series([pd.NA, pd.NA])
717719
tm.assert_series_equal(result, expected)
720+
721+
722+
@td.skip_if_no("pyarrow")
723+
def test_replace_from_index():
724+
# https://github.com/pandas-dev/pandas/issues/61622
725+
idx = pd.Index(["a", "b", "c"], dtype="string[pyarrow]")
726+
expected = pd.Series(["d", "b", "c"], dtype="string[pyarrow]")
727+
result = pd.Series(idx).replace({"z": "b", "a": "d"})
728+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)