Skip to content

Commit 4e4079e

Browse files
Merge remote-tracking branch 'upstream/2.3.x' into backport-60245
2 parents 3aa4a70 + eb22bf8 commit 4e4079e

File tree

94 files changed

+964
-599
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+964
-599
lines changed

.circleci/config.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ jobs:
1515
- checkout
1616
- run: .circleci/setup_env.sh
1717
- run: |
18-
sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
1918
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH \
2019
LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD \
2120
ci/run_tests.sh

doc/source/whatsnew/v2.3.0.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ Conversion
107107
Strings
108108
^^^^^^^
109109
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
110+
- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)
110111
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
111112
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
112113
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
113-
-
114114

115115
Interval
116116
^^^^^^^^
@@ -119,7 +119,7 @@ Interval
119119

120120
Indexing
121121
^^^^^^^^
122-
-
122+
- Fixed bug in :meth:`Index.get_indexer` round-tripping through string dtype when ``infer_string`` is enabled (:issue:`55834`)
123123
-
124124

125125
Missing

pandas/_libs/index.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ class MaskedUInt16Engine(MaskedIndexEngine): ...
6868
class MaskedUInt8Engine(MaskedIndexEngine): ...
6969
class MaskedBoolEngine(MaskedUInt8Engine): ...
7070

71+
class StringObjectEngine(ObjectEngine):
72+
def __init__(self, values: object, na_value) -> None: ...
73+
7174
class BaseMultiIndexCodesEngine:
7275
levels: list[np.ndarray]
7376
offsets: np.ndarray # ndarray[uint64_t, ndim=1]

pandas/_libs/index.pyx

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,32 @@ cdef class ObjectEngine(IndexEngine):
532532
return loc
533533

534534

535+
cdef class StringObjectEngine(ObjectEngine):
536+
537+
cdef:
538+
object na_value
539+
bint uses_na
540+
541+
def __init__(self, ndarray values, na_value):
542+
super().__init__(values)
543+
self.na_value = na_value
544+
self.uses_na = na_value is C_NA
545+
546+
cdef bint _checknull(self, object val):
547+
if self.uses_na:
548+
return val is C_NA
549+
else:
550+
return util.is_nan(val)
551+
552+
cdef _check_type(self, object val):
553+
if isinstance(val, str):
554+
return val
555+
elif self._checknull(val):
556+
return self.na_value
557+
else:
558+
raise KeyError(val)
559+
560+
535561
cdef class DatetimeEngine(Int64Engine):
536562

537563
cdef:

pandas/_libs/lib.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def maybe_convert_objects(
8686
safe: bool = ...,
8787
convert_numeric: bool = ...,
8888
convert_non_numeric: Literal[False] = ...,
89+
convert_string: Literal[False] = ...,
8990
convert_to_nullable_dtype: Literal[False] = ...,
9091
dtype_if_all_nat: DtypeObj | None = ...,
9192
) -> npt.NDArray[np.object_ | np.number]: ...
@@ -97,6 +98,7 @@ def maybe_convert_objects(
9798
safe: bool = ...,
9899
convert_numeric: bool = ...,
99100
convert_non_numeric: bool = ...,
101+
convert_string: bool = ...,
100102
convert_to_nullable_dtype: Literal[True] = ...,
101103
dtype_if_all_nat: DtypeObj | None = ...,
102104
) -> ArrayLike: ...
@@ -108,6 +110,7 @@ def maybe_convert_objects(
108110
safe: bool = ...,
109111
convert_numeric: bool = ...,
110112
convert_non_numeric: bool = ...,
113+
convert_string: bool = ...,
111114
convert_to_nullable_dtype: bool = ...,
112115
dtype_if_all_nat: DtypeObj | None = ...,
113116
) -> ArrayLike: ...

pandas/_libs/lib.pyx

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2498,6 +2498,7 @@ def maybe_convert_objects(ndarray[object] objects,
24982498
bint convert_numeric=True, # NB: different default!
24992499
bint convert_to_nullable_dtype=False,
25002500
bint convert_non_numeric=False,
2501+
bint convert_string=True,
25012502
object dtype_if_all_nat=None) -> "ArrayLike":
25022503
"""
25032504
Type inference function-- convert object array to proper dtype
@@ -2741,7 +2742,17 @@ def maybe_convert_objects(ndarray[object] objects,
27412742
seen.object_ = True
27422743

27432744
elif seen.str_:
2744-
if using_string_dtype() and is_string_array(objects, skipna=True):
2745+
if convert_to_nullable_dtype and is_string_array(objects, skipna=True):
2746+
from pandas.core.arrays.string_ import StringDtype
2747+
2748+
dtype = StringDtype()
2749+
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
2750+
2751+
elif (
2752+
convert_string
2753+
and using_string_dtype()
2754+
and is_string_array(objects, skipna=True)
2755+
):
27452756
from pandas.core.arrays.string_ import StringDtype
27462757

27472758
dtype = StringDtype(na_value=np.nan)

pandas/compat/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
pa_version_under14p1,
3434
pa_version_under16p0,
3535
pa_version_under17p0,
36+
pa_version_under18p0,
3637
)
3738

3839
if TYPE_CHECKING:
@@ -191,6 +192,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
191192
"pa_version_under14p1",
192193
"pa_version_under16p0",
193194
"pa_version_under17p0",
195+
"pa_version_under18p0",
194196
"HAS_PYARROW",
195197
"IS64",
196198
"ISMUSL",

pandas/compat/pyarrow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
pa_version_under15p0 = _palv < Version("15.0.0")
1818
pa_version_under16p0 = _palv < Version("16.0.0")
1919
pa_version_under17p0 = _palv < Version("17.0.0")
20+
pa_version_under18p0 = _palv < Version("18.0.0")
2021
HAS_PYARROW = True
2122
except ImportError:
2223
pa_version_under10p1 = True
@@ -28,4 +29,5 @@
2829
pa_version_under15p0 = True
2930
pa_version_under16p0 = True
3031
pa_version_under17p0 = True
32+
pa_version_under18p0 = False
3133
HAS_PYARROW = False

pandas/core/arrays/arrow/array.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1633,7 +1633,11 @@ def _accumulate(
16331633
else:
16341634
data_to_accum = data_to_accum.cast(pa.int64())
16351635

1636-
result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
1636+
try:
1637+
result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
1638+
except pa.ArrowNotImplementedError as err:
1639+
msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
1640+
raise TypeError(msg) from err
16371641

16381642
if convert_to_int:
16391643
result = result.cast(pa_dtype)
@@ -2285,6 +2289,20 @@ def _groupby_op(
22852289
**kwargs,
22862290
):
22872291
if isinstance(self.dtype, StringDtype):
2292+
if how in [
2293+
"prod",
2294+
"mean",
2295+
"median",
2296+
"cumsum",
2297+
"cumprod",
2298+
"std",
2299+
"sem",
2300+
"var",
2301+
"skew",
2302+
]:
2303+
raise TypeError(
2304+
f"dtype '{self.dtype}' does not support operation '{how}'"
2305+
)
22882306
return super()._groupby_op(
22892307
how=how,
22902308
has_dropped_na=has_dropped_na,

pandas/core/arrays/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2369,6 +2369,20 @@ def _groupby_op(
23692369
# GH#43682
23702370
if isinstance(self.dtype, StringDtype):
23712371
# StringArray
2372+
if op.how in [
2373+
"prod",
2374+
"mean",
2375+
"median",
2376+
"cumsum",
2377+
"cumprod",
2378+
"std",
2379+
"sem",
2380+
"var",
2381+
"skew",
2382+
]:
2383+
raise TypeError(
2384+
f"dtype '{self.dtype}' does not support operation '{how}'"
2385+
)
23722386
if op.how not in ["any", "all"]:
23732387
# Fail early to avoid conversion to object
23742388
op._get_cython_function(op.kind, op.how, np.dtype(object), False)

0 commit comments

Comments
 (0)