Skip to content

Commit 60c6f84

Browse files
Merge remote-tracking branch 'upstream/2.3.x' into backport-59433
2 parents 712d19f + 3362822 commit 60c6f84

File tree

145 files changed

+1452
-846
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

145 files changed

+1452
-846
lines changed

.circleci/config.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ jobs:
1515
- checkout
1616
- run: .circleci/setup_env.sh
1717
- run: |
18-
sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
1918
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH \
2019
LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD \
2120
ci/run_tests.sh

doc/source/whatsnew/v2.3.0.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ enhancement1
3232
Other enhancements
3333
^^^^^^^^^^^^^^^^^^
3434

35+
- The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
36+
when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
37+
updated to work correctly with NumPy >= 2 (:issue:`57739`)
3538
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
3639
-
3740

@@ -104,10 +107,10 @@ Conversion
104107
Strings
105108
^^^^^^^
106109
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
110+
- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)
107111
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
108112
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
109113
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
110-
-
111114

112115
Interval
113116
^^^^^^^^
@@ -116,7 +119,7 @@ Interval
116119

117120
Indexing
118121
^^^^^^^^
119-
-
122+
- Fixed bug in :meth:`Index.get_indexer` round-tripping through string dtype when ``infer_string`` is enabled (:issue:`55834`)
120123
-
121124

122125
Missing
@@ -171,7 +174,8 @@ Styler
171174

172175
Other
173176
^^^^^
174-
-
177+
- Fixed usage of ``inspect`` when the optional dependencies ``pyarrow`` or ``jinja2``
178+
are not installed (:issue:`60196`)
175179
-
176180

177181
.. ---------------------------------------------------------------------------

pandas/_libs/index.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ class MaskedUInt16Engine(MaskedIndexEngine): ...
6868
class MaskedUInt8Engine(MaskedIndexEngine): ...
6969
class MaskedBoolEngine(MaskedUInt8Engine): ...
7070

71+
class StringObjectEngine(ObjectEngine):
72+
def __init__(self, values: object, na_value) -> None: ...
73+
7174
class BaseMultiIndexCodesEngine:
7275
levels: list[np.ndarray]
7376
offsets: np.ndarray # ndarray[uint64_t, ndim=1]

pandas/_libs/index.pyx

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,32 @@ cdef class ObjectEngine(IndexEngine):
532532
return loc
533533

534534

535+
cdef class StringObjectEngine(ObjectEngine):
536+
537+
cdef:
538+
object na_value
539+
bint uses_na
540+
541+
def __init__(self, ndarray values, na_value):
542+
super().__init__(values)
543+
self.na_value = na_value
544+
self.uses_na = na_value is C_NA
545+
546+
cdef bint _checknull(self, object val):
547+
if self.uses_na:
548+
return val is C_NA
549+
else:
550+
return util.is_nan(val)
551+
552+
cdef _check_type(self, object val):
553+
if isinstance(val, str):
554+
return val
555+
elif self._checknull(val):
556+
return self.na_value
557+
else:
558+
raise KeyError(val)
559+
560+
535561
cdef class DatetimeEngine(Int64Engine):
536562

537563
cdef:

pandas/_libs/lib.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def maybe_convert_objects(
8686
safe: bool = ...,
8787
convert_numeric: bool = ...,
8888
convert_non_numeric: Literal[False] = ...,
89+
convert_string: Literal[False] = ...,
8990
convert_to_nullable_dtype: Literal[False] = ...,
9091
dtype_if_all_nat: DtypeObj | None = ...,
9192
) -> npt.NDArray[np.object_ | np.number]: ...
@@ -97,6 +98,7 @@ def maybe_convert_objects(
9798
safe: bool = ...,
9899
convert_numeric: bool = ...,
99100
convert_non_numeric: bool = ...,
101+
convert_string: bool = ...,
100102
convert_to_nullable_dtype: Literal[True] = ...,
101103
dtype_if_all_nat: DtypeObj | None = ...,
102104
) -> ArrayLike: ...
@@ -108,6 +110,7 @@ def maybe_convert_objects(
108110
safe: bool = ...,
109111
convert_numeric: bool = ...,
110112
convert_non_numeric: bool = ...,
113+
convert_string: bool = ...,
111114
convert_to_nullable_dtype: bool = ...,
112115
dtype_if_all_nat: DtypeObj | None = ...,
113116
) -> ArrayLike: ...

pandas/_libs/lib.pyx

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2498,6 +2498,7 @@ def maybe_convert_objects(ndarray[object] objects,
24982498
bint convert_numeric=True, # NB: different default!
24992499
bint convert_to_nullable_dtype=False,
25002500
bint convert_non_numeric=False,
2501+
bint convert_string=True,
25012502
object dtype_if_all_nat=None) -> "ArrayLike":
25022503
"""
25032504
Type inference function-- convert object array to proper dtype
@@ -2741,7 +2742,17 @@ def maybe_convert_objects(ndarray[object] objects,
27412742
seen.object_ = True
27422743

27432744
elif seen.str_:
2744-
if using_string_dtype() and is_string_array(objects, skipna=True):
2745+
if convert_to_nullable_dtype and is_string_array(objects, skipna=True):
2746+
from pandas.core.arrays.string_ import StringDtype
2747+
2748+
dtype = StringDtype()
2749+
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
2750+
2751+
elif (
2752+
convert_string
2753+
and using_string_dtype()
2754+
and is_string_array(objects, skipna=True)
2755+
):
27452756
from pandas.core.arrays.string_ import StringDtype
27462757

27472758
dtype = StringDtype(na_value=np.nan)

pandas/_testing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,8 @@ def shares_memory(left, right) -> bool:
519519
if isinstance(left, MultiIndex):
520520
return shares_memory(left._codes, right)
521521
if isinstance(left, (Index, Series)):
522+
if isinstance(right, (Index, Series)):
523+
return shares_memory(left._values, right._values)
522524
return shares_memory(left._values, right)
523525

524526
if isinstance(left, NDArrayBackedExtensionArray):

pandas/compat/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
pa_version_under14p1,
3434
pa_version_under16p0,
3535
pa_version_under17p0,
36+
pa_version_under18p0,
3637
)
3738

3839
if TYPE_CHECKING:
@@ -191,6 +192,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
191192
"pa_version_under14p1",
192193
"pa_version_under16p0",
193194
"pa_version_under17p0",
195+
"pa_version_under18p0",
194196
"HAS_PYARROW",
195197
"IS64",
196198
"ISMUSL",

pandas/compat/pyarrow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
pa_version_under15p0 = _palv < Version("15.0.0")
1818
pa_version_under16p0 = _palv < Version("16.0.0")
1919
pa_version_under17p0 = _palv < Version("17.0.0")
20+
pa_version_under18p0 = _palv < Version("18.0.0")
2021
HAS_PYARROW = True
2122
except ImportError:
2223
pa_version_under10p1 = True
@@ -28,4 +29,5 @@
2829
pa_version_under15p0 = True
2930
pa_version_under16p0 = True
3031
pa_version_under17p0 = True
32+
pa_version_under18p0 = False
3133
HAS_PYARROW = False

pandas/conftest.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ def multiindex_year_month_day_dataframe_random_data():
548548
"""
549549
tdf = DataFrame(
550550
np.random.default_rng(2).standard_normal((100, 4)),
551-
columns=Index(list("ABCD"), dtype=object),
551+
columns=Index(list("ABCD")),
552552
index=date_range("2000-01-01", periods=100, freq="B"),
553553
)
554554
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
@@ -743,7 +743,7 @@ def string_series() -> Series:
743743
"""
744744
return Series(
745745
np.arange(30, dtype=np.float64) * 1.1,
746-
index=Index([f"i_{i}" for i in range(30)], dtype=object),
746+
index=Index([f"i_{i}" for i in range(30)]),
747747
name="series",
748748
)
749749

@@ -754,7 +754,7 @@ def object_series() -> Series:
754754
Fixture for Series of dtype object with Index of unique strings
755755
"""
756756
data = [f"foo_{i}" for i in range(30)]
757-
index = Index([f"bar_{i}" for i in range(30)], dtype=object)
757+
index = Index([f"bar_{i}" for i in range(30)])
758758
return Series(data, index=index, name="objects", dtype=object)
759759

760760

@@ -846,8 +846,8 @@ def int_frame() -> DataFrame:
846846
"""
847847
return DataFrame(
848848
np.ones((30, 4), dtype=np.int64),
849-
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
850-
columns=Index(list("ABCD"), dtype=object),
849+
index=Index([f"foo_{i}" for i in range(30)]),
850+
columns=Index(list("ABCD")),
851851
)
852852

853853

0 commit comments

Comments
 (0)