Skip to content

Commit bca9135

Browse files
authored
Merge branch 'main' into tempfile
2 parents 81396fa + 035d5b4 commit bca9135

File tree

21 files changed

+160
-60
lines changed

21 files changed

+160
-60
lines changed

doc/source/whatsnew/v2.3.3.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ Bug fixes
4747
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
4848
with a compiled regex and custom flags (:issue:`62240`)
4949
- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
50-
50+
- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
51+
- Fix error being raised when using a numpy ufunc with a Python-backed string array (:issue:`40800`)
5152

5253
Improvements and fixes for Copy-on-Write
5354
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

pandas/_typing.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,7 @@
8383

8484
# numpy compatible types
8585
NumpyValueArrayLike: TypeAlias = ScalarLike_co | npt.ArrayLike
86-
# Name "npt._ArrayLikeInt_co" is not defined [name-defined]
87-
NumpySorter: TypeAlias = npt._ArrayLikeInt_co | None # type: ignore[name-defined]
86+
NumpySorter: TypeAlias = npt._ArrayLikeInt_co | None
8887

8988

9089
P = ParamSpec("P")

pandas/core/algorithms.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -215,14 +215,15 @@ def _reconstruct_data(
215215
# that values.dtype == dtype
216216
cls = dtype.construct_array_type()
217217

218-
# error: Incompatible types in assignment (expression has type
219-
# "ExtensionArray", variable has type "ndarray[Any, Any]")
220-
values = cls._from_sequence(values, dtype=dtype) # type: ignore[assignment]
221-
222-
else:
223-
values = values.astype(dtype, copy=False)
224-
225-
return values
218+
# error: Incompatible return value type
219+
# (got "ExtensionArray",
220+
# expected "ndarray[tuple[Any, ...], dtype[Any]]")
221+
return cls._from_sequence(values, dtype=dtype) # type: ignore[return-value]
222+
223+
# error: Incompatible return value type
224+
# (got "ndarray[tuple[Any, ...], dtype[Any]]",
225+
# expected "ExtensionArray")
226+
return values.astype(dtype, copy=False) # type: ignore[return-value]
226227

227228

228229
def _ensure_arraylike(values, func_name: str) -> ArrayLike:

pandas/core/array_algos/quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def quantile_with_mask(
102102
interpolation=interpolation,
103103
)
104104

105-
result = np.asarray(result) # type: ignore[assignment]
105+
result = np.asarray(result)
106106
result = result.T
107107

108108
return result

pandas/core/arrays/_mixins.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,9 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
151151

152152
td64_values = arr.view(dtype)
153153
return TimedeltaArray._simple_new(td64_values, dtype=dtype)
154-
return arr.view(dtype=dtype)
154+
# error: Argument "dtype" to "view" of "ndarray" has incompatible type
155+
# "ExtensionDtype | dtype[Any]"; expected "dtype[Any] | _HasDType[dtype[Any]]"
156+
return arr.view(dtype=dtype) # type: ignore[arg-type]
155157

156158
def take(
157159
self,

pandas/core/arrays/arrow/_arrow_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def pyarrow_array_to_numpy_and_mask(
4444
mask = pyarrow.BooleanArray.from_buffers(
4545
pyarrow.bool_(), len(arr), [None, bitmask], offset=arr.offset
4646
)
47-
mask = np.asarray(mask) # type: ignore[assignment]
47+
mask = np.asarray(mask)
4848
else:
4949
mask = np.ones(len(arr), dtype=bool)
5050
return data, mask

pandas/core/arrays/arrow/array.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,7 @@ def _box_pa_array(
657657
):
658658
arr_value = np.asarray(value, dtype=object)
659659
# similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
660-
mask = is_pdna_or_none(arr_value) # type: ignore[assignment]
660+
mask = is_pdna_or_none(arr_value)
661661

662662
try:
663663
pa_array = pa.array(value, type=pa_type, mask=mask)
@@ -883,22 +883,27 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
883883
ltype = self._pa_array.type
884884

885885
if isinstance(other, (ExtensionArray, np.ndarray, list)):
886-
boxed = self._box_pa(other)
887-
rtype = boxed.type
888-
if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
889-
pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
890-
):
891-
# GH#62157 match non-pyarrow behavior
892-
result = ops.invalid_comparison(self, other, op)
893-
result = pa.array(result, type=pa.bool_())
886+
try:
887+
boxed = self._box_pa(other)
888+
except pa.lib.ArrowInvalid:
889+
# e.g. GH#60228 [1, "b"] we have to operate pointwise
890+
res_values = [op(x, y) for x, y in zip(self, other)]
891+
result = pa.array(res_values, type=pa.bool_(), from_pandas=True)
894892
else:
895-
try:
896-
result = pc_func(self._pa_array, boxed)
897-
except pa.ArrowNotImplementedError:
898-
# TODO: could this be wrong if other is object dtype?
899-
# in which case we need to operate pointwise?
893+
rtype = boxed.type
894+
if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
895+
pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
896+
):
897+
# GH#62157 match non-pyarrow behavior
900898
result = ops.invalid_comparison(self, other, op)
901899
result = pa.array(result, type=pa.bool_())
900+
else:
901+
try:
902+
result = pc_func(self._pa_array, boxed)
903+
except pa.ArrowNotImplementedError:
904+
result = ops.invalid_comparison(self, other, op)
905+
result = pa.array(result, type=pa.bool_())
906+
902907
elif is_scalar(other):
903908
if (isinstance(other, datetime) and pa.types.is_date(ltype)) or (
904909
type(other) is date and pa.types.is_timestamp(ltype)
@@ -2738,7 +2743,7 @@ def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
27382743
dummies_dtype = np.bool_
27392744
dummies = np.zeros(n_rows * n_cols, dtype=dummies_dtype)
27402745
dummies[indices] = True
2741-
dummies = dummies.reshape((n_rows, n_cols)) # type: ignore[assignment]
2746+
dummies = dummies.reshape((n_rows, n_cols))
27422747
result = self._from_pyarrow_array(pa.array(list(dummies)))
27432748
return result, uniques_sorted.to_pylist()
27442749

pandas/core/arrays/base.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@
3030
from pandas.compat.numpy import function as nv
3131
from pandas.errors import AbstractMethodError
3232
from pandas.util._decorators import (
33-
Appender,
34-
Substitution,
3533
cache_readonly,
3634
)
3735
from pandas.util._validators import (
@@ -1669,9 +1667,48 @@ def factorize(
16691667
Categories (3, str): ['a', 'b', 'c']
16701668
"""
16711669

1672-
@Substitution(klass="ExtensionArray")
1673-
@Appender(_extension_array_shared_docs["repeat"])
16741670
def repeat(self, repeats: int | Sequence[int], axis: AxisInt | None = None) -> Self:
1671+
"""
1672+
Repeat elements of an ExtensionArray.
1673+
1674+
Returns a new ExtensionArray where each element of the current ExtensionArray
1675+
is repeated consecutively a given number of times.
1676+
1677+
Parameters
1678+
----------
1679+
repeats : int or array of ints
1680+
The number of repetitions for each element. This should be a
1681+
non-negative integer. Repeating 0 times will return an empty
1682+
ExtensionArray.
1683+
axis : None
1684+
Must be ``None``. Has no effect but is accepted for compatibility
1685+
with numpy.
1686+
1687+
Returns
1688+
-------
1689+
ExtensionArray
1690+
Newly created ExtensionArray with repeated elements.
1691+
1692+
See Also
1693+
--------
1694+
Series.repeat : Equivalent function for Series.
1695+
Index.repeat : Equivalent function for Index.
1696+
numpy.repeat : Similar method for :class:`numpy.ndarray`.
1697+
ExtensionArray.take : Take arbitrary positions.
1698+
1699+
Examples
1700+
--------
1701+
>>> cat = pd.Categorical(["a", "b", "c"])
1702+
>>> cat
1703+
['a', 'b', 'c']
1704+
Categories (3, str): ['a', 'b', 'c']
1705+
>>> cat.repeat(2)
1706+
['a', 'a', 'b', 'b', 'c', 'c']
1707+
Categories (3, str): ['a', 'b', 'c']
1708+
>>> cat.repeat([1, 2, 3])
1709+
['a', 'b', 'b', 'c', 'c', 'c']
1710+
Categories (3, str): ['a', 'b', 'c']
1711+
"""
16751712
nv.validate_repeat((), {"axis": axis})
16761713
ind = np.arange(len(self)).repeat(repeats)
16771714
return self.take(ind)

pandas/core/arrays/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1869,7 +1869,7 @@ def value_counts(self, dropna: bool = True) -> Series:
18691869
count = np.bincount(obs, minlength=ncat or 0)
18701870
else:
18711871
count = np.bincount(np.where(mask, code, ncat))
1872-
ix = np.append(ix, -1) # type: ignore[assignment]
1872+
ix = np.append(ix, -1)
18731873

18741874
ix = coerce_indexer_dtype(ix, self.dtype.categories)
18751875
ix_categorical = self._from_backing_data(ix)

pandas/core/arrays/datetimes.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,11 @@ def _add_offset(self, offset: BaseOffset) -> Self:
804804
try:
805805
res_values = offset._apply_array(values._ndarray)
806806
if res_values.dtype.kind == "i":
807-
res_values = res_values.view(values.dtype)
807+
# error: Argument 1 to "view" of "ndarray" has
808+
# incompatible type
809+
# "dtype[datetime64[date | int | None]] | DatetimeTZDtype";
810+
# expected "dtype[Any] | _HasDType[dtype[Any]]" [arg-type]
811+
res_values = res_values.view(values.dtype) # type: ignore[arg-type]
808812
except NotImplementedError:
809813
if get_option("performance_warnings"):
810814
warnings.warn(

0 commit comments

Comments
 (0)