Skip to content

Commit 61bef41

Browse files
committed
Merge remote-tracking branch 'upstream/main' into read-csv-from-directory
2 parents e7fee01 + 728be93 commit 61bef41

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+444
-730
lines changed

doc/redirects.csv

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,6 @@ generated/pandas.Index.get_slice_bound,../reference/api/pandas.Index.get_slice_b
643643
generated/pandas.Index.groupby,../reference/api/pandas.Index.groupby
644644
generated/pandas.Index.has_duplicates,../reference/api/pandas.Index.has_duplicates
645645
generated/pandas.Index.hasnans,../reference/api/pandas.Index.hasnans
646-
generated/pandas.Index.holds_integer,../reference/api/pandas.Index.holds_integer
647646
generated/pandas.Index,../reference/api/pandas.Index
648647
generated/pandas.Index.identical,../reference/api/pandas.Index.identical
649648
generated/pandas.Index.inferred_type,../reference/api/pandas.Index.inferred_type

doc/source/index.rst.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ programming language.
113113
:titlesonly:
114114

115115
{{ single_doc[:-4] }}
116-
{% elif single_doc and single_doc.count('.') <= 1 %}
116+
{% elif single_doc and ((single_doc.count('.') <= 1) or ('tseries' in single_doc)) -%}
117117
.. autosummary::
118118
:toctree: reference/api/
119119

doc/source/whatsnew/v2.3.1.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,4 @@ Bug fixes
7373
Contributors
7474
~~~~~~~~~~~~
7575

76-
.. contributors:: v2.3.0..v2.3.1|HEAD
76+
.. contributors:: v2.3.0..v2.3.1

doc/source/whatsnew/v2.3.2.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.. _whatsnew_232:
22

3-
What's new in 2.3.2 (August XX, 2025)
3+
What's new in 2.3.2 (August 21, 2025)
44
-------------------------------------
55

66
These are the changes in pandas 2.3.2. See :ref:`release` for a full changelog
@@ -28,9 +28,13 @@ Bug fixes
2828
- Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`)
2929
- Fixed ``~Series.str.match``, ``~Series.str.fullmatch`` and ``~Series.str.contains``
3030
with compiled regex for the Arrow-backed string dtype (:issue:`61964`, :issue:`61942`)
31+
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently
32+
replacing matching values when missing values are present for string dtypes (:issue:`56599`)
3133

3234
.. ---------------------------------------------------------------------------
3335
.. _whatsnew_232.contributors:
3436

3537
Contributors
3638
~~~~~~~~~~~~
39+
40+
.. contributors:: v2.3.1..v2.3.2|HEAD

doc/source/whatsnew/v3.0.0.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1094,7 +1094,6 @@ Other
10941094
- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)
10951095
- Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`)
10961096
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
1097-
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
10981097
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`)
10991098
- Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`)
11001099
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)

pandas/_libs/tslibs/offsets.pyi

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,12 @@ class YearOffset(SingleConstructorOffset):
156156

157157
class BYearEnd(YearOffset): ...
158158
class BYearBegin(YearOffset): ...
159-
class YearEnd(YearOffset): ...
159+
160+
class YearEnd(YearOffset):
161+
def __new__(
162+
cls, n: int = ..., normalize: bool = ..., month: int | None = ...
163+
) -> Self: ...
164+
160165
class YearBegin(YearOffset): ...
161166

162167
class QuarterOffset(SingleConstructorOffset):

pandas/_libs/tslibs/offsets.pyx

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2737,14 +2737,31 @@ cdef class BYearBegin(YearOffset):
27372737
_prefix = "BYS"
27382738
_day_opt = "business_start"
27392739

2740+
# The pair of classes `_YearEnd` and `YearEnd` exist because of
2741+
# https://github.com/cython/cython/issues/3873
27402742

2741-
cdef class YearEnd(YearOffset):
2743+
cdef class _YearEnd(YearOffset):
2744+
_default_month = 12
2745+
_prefix = "YE"
2746+
_day_opt = "end"
2747+
2748+
cdef readonly:
2749+
int _period_dtype_code
2750+
2751+
def __init__(self, n=1, normalize=False, month=None):
2752+
# Because YearEnd can be the freq for a Period, define its
2753+
# _period_dtype_code at construction for performance
2754+
YearOffset.__init__(self, n, normalize, month)
2755+
self._period_dtype_code = PeriodDtypeCode.A + self.month % 12
2756+
2757+
2758+
class YearEnd(_YearEnd):
27422759
"""
27432760
DateOffset increments between calendar year end dates.
27442761
27452762
YearEnd goes to the next date which is the end of the year.
27462763
2747-
Attributes
2764+
Parameters
27482765
----------
27492766
n : int, default 1
27502767
The number of years represented.
@@ -2778,18 +2795,8 @@ cdef class YearEnd(YearOffset):
27782795
Timestamp('2022-12-31 00:00:00')
27792796
"""
27802797

2781-
_default_month = 12
2782-
_prefix = "YE"
2783-
_day_opt = "end"
2784-
2785-
cdef readonly:
2786-
int _period_dtype_code
2787-
2788-
def __init__(self, n=1, normalize=False, month=None):
2789-
# Because YearEnd can be the freq for a Period, define its
2790-
# _period_dtype_code at construction for performance
2791-
YearOffset.__init__(self, n, normalize, month)
2792-
self._period_dtype_code = PeriodDtypeCode.A + self.month % 12
2798+
def __new__(cls, n=1, normalize=False, month=None):
2799+
return _YearEnd.__new__(cls, n, normalize, month)
27932800

27942801

27952802
cdef class YearBegin(YearOffset):
@@ -5188,8 +5195,8 @@ def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
51885195
warnings.warn(
51895196
f"\'{name}\' is deprecated and will be removed "
51905197
f"in a future version, please use "
5191-
f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\'"
5192-
f" instead.",
5198+
f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\' "
5199+
f"instead.",
51935200
FutureWarning,
51945201
stacklevel=find_stack_level(),
51955202
)
@@ -5202,8 +5209,8 @@ def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
52025209
warnings.warn(
52035210
f"\'{name}\' is deprecated and will be removed "
52045211
f"in a future version, please use "
5205-
f"\'{_name}\'"
5206-
f" instead.",
5212+
f"\'{_name}\' "
5213+
f"instead.",
52075214
FutureWarning,
52085215
stacklevel=find_stack_level(),
52095216
)

pandas/core/arrays/arrow/array.py

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@
7373
validate_indices,
7474
)
7575
from pandas.core.nanops import check_below_min_count
76-
from pandas.core.strings.base import BaseStringArrayMethods
7776

7877
from pandas.io._util import _arrow_dtype_mapping
7978
from pandas.tseries.frequencies import to_offset
@@ -237,7 +236,6 @@ class ArrowExtensionArray(
237236
OpsMixin,
238237
ExtensionArraySupportsAnyAll,
239238
ArrowStringArrayMixin,
240-
BaseStringArrayMethods,
241239
):
242240
"""
243241
Pandas ExtensionArray backed by a PyArrow ChunkedArray.
@@ -392,6 +390,73 @@ def _from_sequence_of_strings(
392390
)
393391
return cls._from_sequence(scalars, dtype=pa_type, copy=copy)
394392

393+
def _cast_pointwise_result(self, values) -> ArrayLike:
394+
if len(values) == 0:
395+
# Retain our dtype
396+
return self[:0].copy()
397+
398+
try:
399+
arr = pa.array(values, from_pandas=True)
400+
except (ValueError, TypeError):
401+
# e.g. test_by_column_values_with_same_starting_value with nested
402+
# values, one entry of which is an ArrowStringArray
403+
# or test_agg_lambda_complex128_dtype_conversion for complex values
404+
return super()._cast_pointwise_result(values)
405+
406+
if pa.types.is_duration(arr.type):
407+
# workaround for https://github.com/apache/arrow/issues/40620
408+
result = ArrowExtensionArray._from_sequence(values)
409+
if pa.types.is_duration(self._pa_array.type):
410+
result = result.astype(self.dtype) # type: ignore[assignment]
411+
elif pa.types.is_timestamp(self._pa_array.type):
412+
# Try to retain original unit
413+
new_dtype = ArrowDtype(pa.duration(self._pa_array.type.unit))
414+
try:
415+
result = result.astype(new_dtype) # type: ignore[assignment]
416+
except ValueError:
417+
pass
418+
elif pa.types.is_date64(self._pa_array.type):
419+
# Try to match unit we get on non-pointwise op
420+
dtype = ArrowDtype(pa.duration("ms"))
421+
result = result.astype(dtype) # type: ignore[assignment]
422+
elif pa.types.is_date(self._pa_array.type):
423+
# Try to match unit we get on non-pointwise op
424+
dtype = ArrowDtype(pa.duration("s"))
425+
result = result.astype(dtype) # type: ignore[assignment]
426+
return result
427+
428+
elif pa.types.is_date(arr.type) and pa.types.is_date(self._pa_array.type):
429+
arr = arr.cast(self._pa_array.type)
430+
elif pa.types.is_time(arr.type) and pa.types.is_time(self._pa_array.type):
431+
arr = arr.cast(self._pa_array.type)
432+
elif pa.types.is_decimal(arr.type) and pa.types.is_decimal(self._pa_array.type):
433+
arr = arr.cast(self._pa_array.type)
434+
elif pa.types.is_integer(arr.type) and pa.types.is_integer(self._pa_array.type):
435+
try:
436+
arr = arr.cast(self._pa_array.type)
437+
except pa.lib.ArrowInvalid:
438+
# e.g. test_combine_add if we can't cast
439+
pass
440+
elif pa.types.is_floating(arr.type) and pa.types.is_floating(
441+
self._pa_array.type
442+
):
443+
try:
444+
arr = arr.cast(self._pa_array.type)
445+
except pa.lib.ArrowInvalid:
446+
# e.g. test_combine_add if we can't cast
447+
pass
448+
449+
if isinstance(self.dtype, StringDtype):
450+
if pa.types.is_string(arr.type) or pa.types.is_large_string(arr.type):
451+
# ArrowStringArrayNumpySemantics
452+
return type(self)(arr).astype(self.dtype)
453+
if self.dtype.na_value is np.nan:
454+
# ArrowEA has different semantics, so we return numpy-based
455+
# result instead
456+
return super()._cast_pointwise_result(values)
457+
return ArrowExtensionArray(arr)
458+
return type(self)(arr)
459+
395460
@classmethod
396461
def _box_pa(
397462
cls, value, pa_type: pa.DataType | None = None

pandas/core/arrays/base.py

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
cast,
2020
overload,
2121
)
22-
import warnings
2322

2423
import numpy as np
2524

@@ -35,13 +34,11 @@
3534
Substitution,
3635
cache_readonly,
3736
)
38-
from pandas.util._exceptions import find_stack_level
3937
from pandas.util._validators import (
4038
validate_bool_kwarg,
4139
validate_insert_loc,
4240
)
4341

44-
from pandas.core.dtypes.cast import maybe_cast_pointwise_result
4542
from pandas.core.dtypes.common import (
4643
is_list_like,
4744
is_scalar,
@@ -89,7 +86,6 @@
8986
AstypeArg,
9087
AxisInt,
9188
Dtype,
92-
DtypeObj,
9389
FillnaOptions,
9490
InterpolateOptions,
9591
NumpySorter,
@@ -311,38 +307,6 @@ def _from_sequence(
311307
"""
312308
raise AbstractMethodError(cls)
313309

314-
@classmethod
315-
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
316-
"""
317-
Strict analogue to _from_sequence, allowing only sequences of scalars
318-
that should be specifically inferred to the given dtype.
319-
320-
Parameters
321-
----------
322-
scalars : sequence
323-
dtype : ExtensionDtype
324-
325-
Raises
326-
------
327-
TypeError or ValueError
328-
329-
Notes
330-
-----
331-
This is called in a try/except block when casting the result of a
332-
pointwise operation.
333-
"""
334-
try:
335-
return cls._from_sequence(scalars, dtype=dtype, copy=False)
336-
except (ValueError, TypeError):
337-
raise
338-
except Exception:
339-
warnings.warn(
340-
"_from_scalars should only raise ValueError or TypeError. "
341-
"Consider overriding _from_scalars where appropriate.",
342-
stacklevel=find_stack_level(),
343-
)
344-
raise
345-
346310
@classmethod
347311
def _from_sequence_of_strings(
348312
cls, strings, *, dtype: ExtensionDtype, copy: bool = False
@@ -371,9 +335,6 @@ def _from_sequence_of_strings(
371335
from a sequence of scalars.
372336
api.extensions.ExtensionArray._from_factorized : Reconstruct an ExtensionArray
373337
after factorization.
374-
api.extensions.ExtensionArray._from_scalars : Strict analogue to _from_sequence,
375-
allowing only sequences of scalars that should be specifically inferred to
376-
the given dtype.
377338
378339
Examples
379340
--------
@@ -416,6 +377,14 @@ def _from_factorized(cls, values, original):
416377
"""
417378
raise AbstractMethodError(cls)
418379

380+
def _cast_pointwise_result(self, values) -> ArrayLike:
381+
"""
382+
Cast the result of a pointwise operation (e.g. Series.map) to an
383+
array, preserve dtype_backend if possible.
384+
"""
385+
values = np.asarray(values, dtype=object)
386+
return lib.maybe_convert_objects(values, convert_non_numeric=True)
387+
419388
# ------------------------------------------------------------------------
420389
# Must be a Sequence
421390
# ------------------------------------------------------------------------
@@ -2842,7 +2811,7 @@ def _maybe_convert(arr):
28422811
# https://github.com/pandas-dev/pandas/issues/22850
28432812
# We catch all regular exceptions here, and fall back
28442813
# to an ndarray.
2845-
res = maybe_cast_pointwise_result(arr, self.dtype, same_dtype=False)
2814+
res = self._cast_pointwise_result(arr)
28462815
if not isinstance(res, type(self)):
28472816
# exception raised in _from_sequence; ensure we have ndarray
28482817
res = np.asarray(arr)

pandas/core/arrays/categorical.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@
103103
AstypeArg,
104104
AxisInt,
105105
Dtype,
106-
DtypeObj,
107106
NpDtype,
108107
Ordered,
109108
Shape,
@@ -529,20 +528,12 @@ def _from_sequence(
529528
) -> Self:
530529
return cls(scalars, dtype=dtype, copy=copy)
531530

532-
@classmethod
533-
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
534-
if dtype is None:
535-
# The _from_scalars strictness doesn't make much sense in this case.
536-
raise NotImplementedError
537-
538-
res = cls._from_sequence(scalars, dtype=dtype)
539-
540-
# if there are any non-category elements in scalars, these will be
541-
# converted to NAs in res.
542-
mask = isna(scalars)
543-
if not (mask == res.isna()).all():
544-
# Some non-category element in scalars got converted to NA in res.
545-
raise ValueError
531+
def _cast_pointwise_result(self, values) -> ArrayLike:
532+
res = super()._cast_pointwise_result(values)
533+
cat = type(self)._from_sequence(res, dtype=self.dtype)
534+
if (cat.isna() == isna(res)).all():
535+
# i.e. the conversion was non-lossy
536+
return cat
546537
return res
547538

548539
@overload

0 commit comments

Comments
 (0)