Skip to content

Commit cb96c2e

Browse files
Merge remote-tracking branch 'upstream/main' into rls-133
2 parents b02ff27 + 5614c7c commit cb96c2e

File tree

13 files changed

+142
-40
lines changed

13 files changed

+142
-40
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ jobs:
162162
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
163163

164164
- name: Build wheels
165-
uses: pypa/cibuildwheel@v3.1.4
165+
uses: pypa/cibuildwheel@v3.2.0
166166
with:
167167
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
168168
env:

doc/source/whatsnew/v2.3.3.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Bug fixes
5151
- Fix bug in :meth:`~DataFrame.groupby` with ``sum()`` and unobserved categories resulting in ``0`` instead of the empty string ``""`` (:issue:`61909`)
5252
- Fix :meth:`Series.str.isdigit` to correctly recognize unicode superscript
5353
characters as digits for :class:`StringDtype` backed by PyArrow (:issue:`61466`)
54+
- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
55+
- Fix error being raised when using a numpy ufunc with a Python-backed string array (:issue:`40800`)
5456

5557
Other changes
5658
~~~~~~~~~~~~~

pandas/core/arrays/arrow/array.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -883,22 +883,27 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
883883
ltype = self._pa_array.type
884884

885885
if isinstance(other, (ExtensionArray, np.ndarray, list)):
886-
boxed = self._box_pa(other)
887-
rtype = boxed.type
888-
if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
889-
pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
890-
):
891-
# GH#62157 match non-pyarrow behavior
892-
result = ops.invalid_comparison(self, other, op)
893-
result = pa.array(result, type=pa.bool_())
886+
try:
887+
boxed = self._box_pa(other)
888+
except pa.lib.ArrowInvalid:
889+
# e.g. GH#60228 [1, "b"] we have to operate pointwise
890+
res_values = [op(x, y) for x, y in zip(self, other)]
891+
result = pa.array(res_values, type=pa.bool_(), from_pandas=True)
894892
else:
895-
try:
896-
result = pc_func(self._pa_array, boxed)
897-
except pa.ArrowNotImplementedError:
898-
# TODO: could this be wrong if other is object dtype?
899-
# in which case we need to operate pointwise?
893+
rtype = boxed.type
894+
if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
895+
pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
896+
):
897+
# GH#62157 match non-pyarrow behavior
900898
result = ops.invalid_comparison(self, other, op)
901899
result = pa.array(result, type=pa.bool_())
900+
else:
901+
try:
902+
result = pc_func(self._pa_array, boxed)
903+
except pa.ArrowNotImplementedError:
904+
result = ops.invalid_comparison(self, other, op)
905+
result = pa.array(result, type=pa.bool_())
906+
902907
elif is_scalar(other):
903908
if (isinstance(other, datetime) and pa.types.is_date(ltype)) or (
904909
type(other) is date and pa.types.is_timestamp(ltype)

pandas/core/arrays/base.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@
3030
from pandas.compat.numpy import function as nv
3131
from pandas.errors import AbstractMethodError
3232
from pandas.util._decorators import (
33-
Appender,
34-
Substitution,
3533
cache_readonly,
3634
)
3735
from pandas.util._validators import (
@@ -1669,9 +1667,48 @@ def factorize(
16691667
Categories (3, str): ['a', 'b', 'c']
16701668
"""
16711669

1672-
@Substitution(klass="ExtensionArray")
1673-
@Appender(_extension_array_shared_docs["repeat"])
16741670
def repeat(self, repeats: int | Sequence[int], axis: AxisInt | None = None) -> Self:
1671+
"""
1672+
Repeat elements of an ExtensionArray.
1673+
1674+
Returns a new ExtensionArray where each element of the current ExtensionArray
1675+
is repeated consecutively a given number of times.
1676+
1677+
Parameters
1678+
----------
1679+
repeats : int or array of ints
1680+
The number of repetitions for each element. This should be a
1681+
non-negative integer. Repeating 0 times will return an empty
1682+
ExtensionArray.
1683+
axis : None
1684+
Must be ``None``. Has no effect but is accepted for compatibility
1685+
with numpy.
1686+
1687+
Returns
1688+
-------
1689+
ExtensionArray
1690+
Newly created ExtensionArray with repeated elements.
1691+
1692+
See Also
1693+
--------
1694+
Series.repeat : Equivalent function for Series.
1695+
Index.repeat : Equivalent function for Index.
1696+
numpy.repeat : Similar method for :class:`numpy.ndarray`.
1697+
ExtensionArray.take : Take arbitrary positions.
1698+
1699+
Examples
1700+
--------
1701+
>>> cat = pd.Categorical(["a", "b", "c"])
1702+
>>> cat
1703+
['a', 'b', 'c']
1704+
Categories (3, str): ['a', 'b', 'c']
1705+
>>> cat.repeat(2)
1706+
['a', 'a', 'b', 'b', 'c', 'c']
1707+
Categories (3, str): ['a', 'b', 'c']
1708+
>>> cat.repeat([1, 2, 3])
1709+
['a', 'b', 'b', 'c', 'c', 'c']
1710+
Categories (3, str): ['a', 'b', 'c']
1711+
"""
16751712
nv.validate_repeat((), {"axis": axis})
16761713
ind = np.arange(len(self)).repeat(repeats)
16771714
return self.take(ind)

pandas/core/arrays/numpy_.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
Any,
66
Literal,
77
Self,
8+
cast,
89
)
910

1011
import numpy as np
@@ -48,6 +49,7 @@
4849
)
4950

5051
from pandas import Index
52+
from pandas.arrays import StringArray
5153

5254

5355
class NumpyExtensionArray(
@@ -234,6 +236,16 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
234236
# e.g. test_np_max_nested_tuples
235237
return result
236238
else:
239+
if self.dtype.type is str: # type: ignore[comparison-overlap]
240+
# StringDtype
241+
self = cast("StringArray", self)
242+
try:
243+
# specify dtype to preserve storage/na_value
244+
return type(self)(result, dtype=self.dtype)
245+
except ValueError:
246+
# if validation of input fails (no strings)
247+
# -> fallback to returning raw numpy array
248+
return result
237249
# one return value; re-box array-like results
238250
return type(self)(result)
239251

pandas/plotting/_core.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,10 @@ def _get_call_args(backend_name: str, data: Series | DataFrame, args, kwargs):
962962
if args and isinstance(data, ABCSeries):
963963
positional_args = str(args)[1:-1]
964964
keyword_args = ", ".join(
965-
[f"{name}={value!r}" for (name, _), value in zip(arg_def, args)]
965+
[
966+
f"{name}={value!r}"
967+
for (name, _), value in zip(arg_def, args, strict=False)
968+
]
966969
)
967970
msg = (
968971
"`Series.plot()` should not be called with positional "
@@ -973,7 +976,9 @@ def _get_call_args(backend_name: str, data: Series | DataFrame, args, kwargs):
973976
)
974977
raise TypeError(msg)
975978

976-
pos_args = {name: value for (name, _), value in zip(arg_def, args)}
979+
pos_args = {
980+
name: value for (name, _), value in zip(arg_def, args, strict=False)
981+
}
977982
if backend_name == "pandas.plotting._matplotlib":
978983
kwargs = dict(arg_def, **pos_args, **kwargs)
979984
else:

pandas/plotting/_matplotlib/boxplot.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,9 +321,9 @@ def _grouped_plot_by_column(
321321

322322
ax_values = []
323323

324-
for ax, col in zip(flatten_axes(axes), columns):
324+
for ax, col in zip(flatten_axes(axes), columns, strict=False):
325325
gp_col = grouped[col]
326-
keys, values = zip(*gp_col)
326+
keys, values = zip(*gp_col, strict=True)
327327
re_plotf = plotf(keys, values, ax, xlabel=xlabel, ylabel=ylabel, **kwargs)
328328
ax.set_title(col)
329329
ax_values.append(re_plotf)
@@ -380,7 +380,7 @@ def _get_colors():
380380
# taken from the colors dict parameter
381381
# "boxes" value placed in position 0, "whiskers" in 1, etc.
382382
valid_keys = ["boxes", "whiskers", "medians", "caps"]
383-
key_to_index = dict(zip(valid_keys, range(4)))
383+
key_to_index = dict(zip(valid_keys, range(4), strict=True))
384384
for key, value in colors.items():
385385
if key in valid_keys:
386386
result[key_to_index[key]] = value
@@ -530,7 +530,7 @@ def boxplot_frame_groupby(
530530
layout=layout,
531531
)
532532
data = {}
533-
for (key, group), ax in zip(grouped, flatten_axes(axes)):
533+
for (key, group), ax in zip(grouped, flatten_axes(axes), strict=False):
534534
d = group.boxplot(
535535
ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds
536536
)
@@ -539,7 +539,7 @@ def boxplot_frame_groupby(
539539
ret = pd.Series(data)
540540
maybe_adjust_figure(fig, bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
541541
else:
542-
keys, frames = zip(*grouped)
542+
keys, frames = zip(*grouped, strict=True)
543543
df = pd.concat(frames, keys=keys, axis=1)
544544

545545
# GH 16748, DataFrameGroupby fails when subplots=False and `column` argument

pandas/plotting/_matplotlib/core.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,7 @@ def _adorn_subplots(self, fig: Figure) -> None:
817817
f"number of columns = {self.nseries}"
818818
)
819819

820-
for ax, title in zip(self.axes, self.title):
820+
for ax, title in zip(self.axes, self.title, strict=False):
821821
ax.set_title(title)
822822
else:
823823
fig.suptitle(self.title)
@@ -1216,7 +1216,7 @@ def _get_errorbars(
12161216
) -> dict[str, Any]:
12171217
errors = {}
12181218

1219-
for kw, flag in zip(["xerr", "yerr"], [xerr, yerr]):
1219+
for kw, flag in zip(["xerr", "yerr"], [xerr, yerr], strict=True):
12201220
if flag:
12211221
err = self.errors[kw]
12221222
# user provided label-matched dataframe of errors
@@ -1457,7 +1457,7 @@ def _get_color_mapping(self, c_values: Series) -> dict[str, np.ndarray]:
14571457
cmap = mpl.colormaps.get_cmap(self.colormap)
14581458
colors = cmap(np.linspace(0, 1, n_colors)) # RGB tuples
14591459

1460-
return dict(zip(unique, colors))
1460+
return dict(zip(unique, colors, strict=True))
14611461

14621462
def _get_norm_and_cmap(self, c_values, color_by_categorical: bool):
14631463
c = self.c
@@ -2178,7 +2178,10 @@ def blank_labeler(label, value):
21782178
# Blank out labels for values of 0 so they don't overlap
21792179
# with nonzero wedges
21802180
if labels is not None:
2181-
blabels = [blank_labeler(left, value) for left, value in zip(labels, y)]
2181+
blabels = [
2182+
blank_labeler(left, value)
2183+
for left, value in zip(labels, y, strict=True)
2184+
]
21822185
else:
21832186
blabels = None
21842187
results = ax.pie(y, labels=blabels, **kwds)
@@ -2197,7 +2200,7 @@ def blank_labeler(label, value):
21972200

21982201
# leglabels is used for legend labels
21992202
leglabels = labels if labels is not None else idx
2200-
for _patch, _leglabel in zip(patches, leglabels):
2203+
for _patch, _leglabel in zip(patches, leglabels, strict=True):
22012204
self._append_legend_handles_labels(_patch, _leglabel)
22022205

22032206
def _post_plot_logic(self, ax: Axes, data) -> None:

pandas/plotting/_matplotlib/hist.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def _grouped_plot(
323323
naxes=naxes, figsize=figsize, sharex=sharex, sharey=sharey, ax=ax, layout=layout
324324
)
325325

326-
for ax, (key, group) in zip(flatten_axes(axes), grouped):
326+
for ax, (key, group) in zip(flatten_axes(axes), grouped, strict=False):
327327
if numeric_only and isinstance(group, ABCDataFrame):
328328
group = group._get_numeric_data()
329329
plotf(group, ax, **kwargs)
@@ -557,7 +557,7 @@ def hist_frame(
557557
)
558558
can_set_label = "label" not in kwds
559559

560-
for ax, col in zip(flatten_axes(axes), data.columns):
560+
for ax, col in zip(flatten_axes(axes), data.columns, strict=False):
561561
if legend and can_set_label:
562562
kwds["label"] = col
563563
ax.hist(data[col].dropna().values, bins=bins, **kwds)

pandas/plotting/_matplotlib/misc.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def normalize(series):
191191

192192
ax.add_patch(mpl.patches.Circle((0.0, 0.0), radius=1.0, facecolor="none"))
193193

194-
for xy, name in zip(s, df.columns):
194+
for xy, name in zip(s, df.columns, strict=True):
195195
ax.add_patch(mpl.patches.Circle(xy, radius=0.025, facecolor="gray"))
196196

197197
if xy[0] < 0.0 and xy[1] < 0.0:
@@ -266,7 +266,7 @@ def f(t):
266266
color_values = get_standard_colors(
267267
num_colors=len(classes), colormap=colormap, color_type="random", color=color
268268
)
269-
colors = dict(zip(classes, color_values))
269+
colors = dict(zip(classes, color_values, strict=False))
270270
if ax is None:
271271
ax = plt.gca()
272272
ax.set_xlim(-np.pi, np.pi)
@@ -399,7 +399,7 @@ def parallel_coordinates(
399399
if sort_labels:
400400
classes = sorted(classes)
401401
color_values = sorted(color_values)
402-
colors = dict(zip(classes, color_values))
402+
colors = dict(zip(classes, color_values, strict=True))
403403

404404
for i in range(n):
405405
y = df.iloc[i].values

0 commit comments

Comments
 (0)