Skip to content

Commit e09ab5f

Browse files
authored
Merge branch 'main' into Test_issue_57930
2 parents ae61f89 + 73da90c commit e09ab5f

File tree

19 files changed

+113
-80
lines changed

19 files changed

+113
-80
lines changed

.gitattributes

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,5 @@ pandas/tests/io/parser/data export-ignore
8585

8686
# Include cibw script in sdist since it's needed for building wheels
8787
scripts/cibw_before_build.sh -export-ignore
88-
scripts/cibw_before_test.sh -export-ignore
88+
scripts/cibw_before_build_windows.sh -export-ignore
89+
scripts/cibw_before_test_windows.sh -export-ignore

.github/workflows/unit-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,8 @@ jobs:
387387
- name: Build Environment
388388
run: |
389389
python --version
390-
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
391-
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy cython
390+
python -m pip install --upgrade pip setuptools wheel numpy meson[ninja]==1.2.1 meson-python==0.13.1
391+
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
392392
python -m pip install versioneer[toml]
393393
python -m pip install python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
394394
python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"

.github/workflows/wheels.yml

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,6 @@ jobs:
111111
- buildplat: [ubuntu-22.04, pyodide_wasm32]
112112
python: ["cp312", "3.12"]
113113
cibw_build_frontend: 'build'
114-
# TODO: Build free-threaded wheels for Windows
115-
exclude:
116-
- buildplat: [windows-2022, win_amd64]
117-
python: ["cp313t", "3.13"]
118114

119115
env:
120116
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
@@ -181,20 +177,6 @@ jobs:
181177
shell: bash -el {0}
182178
run: for whl in $(ls wheelhouse); do wheel unpack wheelhouse/$whl -d /tmp; done
183179

184-
# Testing on windowsservercore instead of GHA runner to fail on missing DLLs
185-
- name: Test Windows Wheels
186-
if: ${{ matrix.buildplat[1] == 'win_amd64' }}
187-
shell: pwsh
188-
run: |
189-
$TST_CMD = @"
190-
python -m pip install hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0;
191-
python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
192-
python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`';
193-
"@
194-
# add rc to the end of the image name if the Python version is unreleased
195-
docker pull python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
196-
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
197-
198180
- uses: actions/upload-artifact@v4
199181
with:
200182
name: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

MANIFEST.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,5 @@ graft pandas/_libs/include
6565

6666
# Include cibw script in sdist since it's needed for building wheels
6767
include scripts/cibw_before_build.sh
68+
include scripts/cibw_before_build_windows.sh
69+
include scripts/cibw_before_test_windows.sh

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ Conversion
106106
Strings
107107
^^^^^^^
108108
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
109+
- Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)
109110
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
110111
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
111112
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
112-
-
113113

114114
Interval
115115
^^^^^^^^

pandas/core/arrays/string_.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -730,20 +730,9 @@ def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]:
730730

731731
return arr, self.dtype.na_value
732732

733-
def __setitem__(self, key, value) -> None:
734-
value = extract_array(value, extract_numpy=True)
735-
if isinstance(value, type(self)):
736-
# extract_array doesn't extract NumpyExtensionArray subclasses
737-
value = value._ndarray
738-
739-
key = check_array_indexer(self, key)
740-
scalar_key = lib.is_scalar(key)
741-
scalar_value = lib.is_scalar(value)
742-
if scalar_key and not scalar_value:
743-
raise ValueError("setting an array element with a sequence.")
744-
745-
# validate new items
746-
if scalar_value:
733+
def _maybe_convert_setitem_value(self, value):
734+
"""Maybe convert value to be pyarrow compatible."""
735+
if lib.is_scalar(value):
747736
if isna(value):
748737
value = self.dtype.na_value
749738
elif not isinstance(value, str):
@@ -753,8 +742,11 @@ def __setitem__(self, key, value) -> None:
753742
"instead."
754743
)
755744
else:
745+
value = extract_array(value, extract_numpy=True)
756746
if not is_array_like(value):
757747
value = np.asarray(value, dtype=object)
748+
elif isinstance(value.dtype, type(self.dtype)):
749+
return value
758750
else:
759751
# cast categories and friends to arrays to see if values are
760752
# compatible, compatibility with arrow backed strings
@@ -764,11 +756,26 @@ def __setitem__(self, key, value) -> None:
764756
"Invalid value for dtype 'str'. Value should be a "
765757
"string or missing value (or array of those)."
766758
)
759+
return value
767760

768-
mask = isna(value)
769-
if mask.any():
770-
value = value.copy()
771-
value[isna(value)] = self.dtype.na_value
761+
def __setitem__(self, key, value) -> None:
762+
value = self._maybe_convert_setitem_value(value)
763+
764+
key = check_array_indexer(self, key)
765+
scalar_key = lib.is_scalar(key)
766+
scalar_value = lib.is_scalar(value)
767+
if scalar_key and not scalar_value:
768+
raise ValueError("setting an array element with a sequence.")
769+
770+
if not scalar_value:
771+
if value.dtype == self.dtype:
772+
value = value._ndarray
773+
else:
774+
value = np.asarray(value)
775+
mask = isna(value)
776+
if mask.any():
777+
value = value.copy()
778+
value[isna(value)] = self.dtype.na_value
772779

773780
super().__setitem__(key, value)
774781

pandas/core/dtypes/cast.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,13 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
17491749
except (ValueError, TypeError):
17501750
return False
17511751

1752+
if dtype == "string":
1753+
try:
1754+
arr._maybe_convert_setitem_value(element) # type: ignore[union-attr]
1755+
return True
1756+
except (ValueError, TypeError):
1757+
return False
1758+
17521759
# This is technically incorrect, but maintains the behavior of
17531760
# ExtensionBlock._can_hold_element
17541761
return True

pandas/core/dtypes/missing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
NaT,
2020
iNaT,
2121
)
22+
from pandas.util._decorators import set_module
2223

2324
from pandas.core.dtypes.common import (
2425
DT64NS_DTYPE,
@@ -93,6 +94,7 @@ def isna(
9394
def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ...
9495

9596

97+
@set_module("pandas")
9698
def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
9799
"""
98100
Detect missing values for an array-like object.
@@ -307,6 +309,7 @@ def notna(
307309
def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: ...
308310

309311

312+
@set_module("pandas")
310313
def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
311314
"""
312315
Detect non-missing values for an array-like object.

pandas/core/generic.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7668,8 +7668,12 @@ def interpolate(
76687668
* 'linear': Ignore the index and treat the values as equally
76697669
spaced. This is the only method supported on MultiIndexes.
76707670
* 'time': Works on daily and higher resolution data to interpolate
7671-
given length of interval.
7672-
* 'index', 'values': use the actual numerical values of the index.
7671+
given length of interval. This interpolates values based on
7672+
time interval between observations.
7673+
* 'index': The interpolation uses the numerical values
7674+
of the DataFrame's index to linearly calculate missing values.
7675+
* 'values': Interpolation based on the numerical values
7676+
in the DataFrame, treating them as equally spaced along the index.
76737677
* 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
76747678
'barycentric', 'polynomial': Passed to
76757679
`scipy.interpolate.interp1d`, whereas 'spline' is passed to

pandas/core/internals/blocks.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
ABCNumpyExtensionArray,
7878
ABCSeries,
7979
)
80+
from pandas.core.dtypes.inference import is_re
8081
from pandas.core.dtypes.missing import (
8182
is_valid_na_for_dtype,
8283
isna,
@@ -706,7 +707,7 @@ def replace(
706707
# bc _can_hold_element is incorrect.
707708
return [self.copy(deep=False)]
708709

709-
elif self._can_hold_element(value):
710+
elif self._can_hold_element(value) or (self.dtype == "string" and is_re(value)):
710711
# TODO(CoW): Maybe split here as well into columns where mask has True
711712
# and rest?
712713
blk = self._maybe_copy(inplace)
@@ -766,14 +767,24 @@ def _replace_regex(
766767
-------
767768
List[Block]
768769
"""
769-
if not self._can_hold_element(to_replace):
770+
if not is_re(to_replace) and not self._can_hold_element(to_replace):
770771
# i.e. only if self.is_object is True, but could in principle include a
771772
# String ExtensionBlock
772773
return [self.copy(deep=False)]
773774

774-
rx = re.compile(to_replace)
775+
if is_re(to_replace) and self.dtype not in [object, "string"]:
776+
# only object or string dtype can hold strings, and a regex object
777+
# will only match strings
778+
return [self.copy(deep=False)]
775779

776-
block = self._maybe_copy(inplace)
780+
if not (
781+
self._can_hold_element(value) or (self.dtype == "string" and is_re(value))
782+
):
783+
block = self.astype(np.dtype(object))
784+
else:
785+
block = self._maybe_copy(inplace)
786+
787+
rx = re.compile(to_replace)
777788

778789
replace_regex(block.values, rx, value, mask)
779790
return [block]
@@ -793,7 +804,9 @@ def replace_list(
793804

794805
# Exclude anything that we know we won't contain
795806
pairs = [
796-
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
807+
(x, y)
808+
for x, y in zip(src_list, dest_list)
809+
if (self._can_hold_element(x) or (self.dtype == "string" and is_re(x)))
797810
]
798811
if not len(pairs):
799812
return [self.copy(deep=False)]

0 commit comments

Comments
 (0)