Skip to content

Commit 5e69871

Browse files
committed
Added testcases for StringArray addition and fixes
1 parent 23767fe commit 5e69871

File tree

5 files changed

+123
-28
lines changed

5 files changed

+123
-28
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ Other enhancements
200200
- :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`)
201201
- :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`)
202202
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
203+
- :class:`StringDtype` now supports addition to Series/DataFrame with floats, ints, and strings (:issue:`61581`)
203204
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
204205
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
205206
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
@@ -227,7 +228,6 @@ Other enhancements
227228
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
228229
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
229230
- Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`)
230-
-
231231

232232
.. ---------------------------------------------------------------------------
233233
.. _whatsnew_300.notable_bug_fixes:

pandas/core/arrays/arrow/array.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,17 @@ def _op_method_error_message(self, other, op) -> str:
890890
def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
891891
pa_type = self._pa_array.type
892892
other_original = other
893-
other = self._box_pa(other)
893+
try:
894+
other = self._box_pa(other)
895+
except pa.lib.ArrowTypeError:
896+
# was expecting time dtype but received non-temporal dtype (time offset)
897+
from pandas.core.tools.timedeltas import to_timedelta
898+
899+
other = self._box_pa(to_timedelta(other))
900+
except ValueError as err:
901+
raise TypeError(
902+
"Incompatible type when converting to PyArrow dtype for operation."
903+
) from err
894904

895905
if (
896906
pa.types.is_string(pa_type)
@@ -903,19 +913,31 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
903913
pa.types.is_integer(other.type)
904914
or pa.types.is_floating(other.type)
905915
or pa.types.is_null(other.type)
916+
or pa.types.is_string(other.type)
917+
or pa.types.is_large_string(other.type)
918+
or pa.types.is_binary(other.type)
906919
):
907920
other = other.cast(pa_type)
908-
sep = pa.scalar("", type=pa_type)
909-
try:
910-
if op is operator.add:
911-
result = pc.binary_join_element_wise(self._pa_array, other, sep)
912-
elif op is roperator.radd:
913-
result = pc.binary_join_element_wise(other, self._pa_array, sep)
914-
except pa.ArrowNotImplementedError as err:
921+
sep = pa.scalar("", type=pa_type)
922+
try:
923+
if op is operator.add:
924+
result = pc.binary_join_element_wise(
925+
self._pa_array, other, sep
926+
)
927+
elif op is roperator.radd:
928+
result = pc.binary_join_element_wise(
929+
other, self._pa_array, sep
930+
)
931+
except pa.ArrowNotImplementedError as err:
932+
raise TypeError(
933+
self._op_method_error_message(other_original, op)
934+
) from err
935+
return self._from_pyarrow_array(result)
936+
else:
915937
raise TypeError(
916-
self._op_method_error_message(other_original, op)
917-
) from err
918-
return self._from_pyarrow_array(result)
938+
"Can only add string arrays to dtypes "
939+
"null, int, float, str, and binary."
940+
)
919941
elif op in [operator.mul, roperator.rmul]:
920942
binary = self._pa_array
921943
integral = other

pandas/core/arrays/string_.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,23 +1111,27 @@ def _cmp_method(self, other, op):
11111111
if op.__name__ in ops.ARITHMETIC_BINOPS:
11121112
result = np.empty_like(self._ndarray, dtype="object")
11131113
result[mask] = self.dtype.na_value
1114-
try:
1115-
result[valid] = op(self._ndarray[valid], other)
1116-
if isinstance(other, Path):
1117-
# GH#61940
1118-
return result
1119-
except TypeError:
1120-
if is_array_like(other):
1121-
if is_float_dtype(other.dtype):
1122-
# Shorten whole numbers to be ints to match pyarrow behavior
1114+
if op.__name__ in ["add", "radd"]:
1115+
if isinstance(other, str) or is_string_dtype(other):
1116+
pass
1117+
elif is_float_dtype(other) or is_integer_dtype(other):
1118+
if is_float_dtype(other):
1119+
# Shorten whole number floats to match pyarrow behavior
11231120
other = [
11241121
str(int(x)) if x.is_integer() else str(x) for x in other
11251122
]
11261123
else:
11271124
other = other.astype(str)
1128-
result[valid] = op(self._ndarray[valid], other)
11291125
else:
1130-
raise
1126+
raise TypeError(
1127+
f"Only supports op({op.__name__}) between StringArray and "
1128+
"dtypes int, float, and str."
1129+
)
1130+
1131+
result[valid] = op(self._ndarray[valid], other)
1132+
if isinstance(other, Path):
1133+
# GH#61940
1134+
return result
11311135

11321136
return self._from_backing_data(result)
11331137
else:

pandas/tests/arrays/string_/test_string.py

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55

66
import operator
7+
from re import escape
78

89
import numpy as np
910
import pytest
@@ -249,6 +250,32 @@ def test_mul(dtype):
249250
tm.assert_extension_array_equal(result, expected)
250251

251252

253+
def test_add_series(dtype):
254+
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
255+
df = pd.Series(["t", "y", "v", "w"], dtype=object)
256+
257+
result = arr + df
258+
expected = pd.Series(["at", "by", "cv", "dw"]).astype(dtype)
259+
tm.assert_series_equal(result, expected)
260+
261+
result = df + arr
262+
expected = pd.Series(["ta", "yb", "vc", "wd"]).astype(dtype)
263+
tm.assert_series_equal(result, expected)
264+
265+
266+
def test_add_series_float(dtype):
267+
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
268+
df = pd.Series([1, 2.0, 3.5, 4])
269+
270+
result = arr + df
271+
expected = pd.Series(["a1", "b2", "c3.5", "d4"]).astype(dtype)
272+
tm.assert_series_equal(result, expected)
273+
274+
result = df + arr
275+
expected = pd.Series(["1a", "2b", "3.5c", "4d"]).astype(dtype)
276+
tm.assert_series_equal(result, expected)
277+
278+
252279
def test_add_strings(dtype):
253280
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
254281
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
@@ -278,19 +305,58 @@ def test_add_frame(dtype):
278305
tm.assert_frame_equal(result, expected, check_dtype=False)
279306

280307

281-
def test_add_frame_mixed_type(dtype):
282-
arr = pd.array(["a", "bc", 3, np.nan], dtype=dtype)
283-
df = pd.DataFrame([[1, 2, 3.3, 4]])
308+
def test_add_frame_int(dtype):
309+
arr = pd.array(["a", "b", "c", np.nan], dtype=dtype)
310+
df = pd.DataFrame([[1, np.nan, 3, np.nan]])
284311

285312
result = arr + df
286-
expected = pd.DataFrame([["a1", "bc2", "33.3", np.nan]]).astype(dtype)
313+
expected = pd.DataFrame([["a1", np.nan, "c3", np.nan]]).astype(dtype)
287314
tm.assert_frame_equal(result, expected, check_dtype=False)
288315

289316
result = df + arr
290-
expected = pd.DataFrame([["1a", "2bc", "3.33", np.nan]]).astype(dtype)
317+
expected = pd.DataFrame([["1a", np.nan, "3c", np.nan]]).astype(dtype)
291318
tm.assert_frame_equal(result, expected, check_dtype=False)
292319

293320

321+
@pytest.mark.parametrize(
322+
"invalid",
323+
[
324+
pd.Timedelta(hours=31),
325+
pd.Timestamp("2021-01-01"),
326+
np.datetime64("NaT", "ns"),
327+
pd.NaT,
328+
True,
329+
pd.Period("2025-09"),
330+
pd.Categorical(["test"]),
331+
pd.offsets.Minute(3),
332+
pd.Interval(1, 2, closed="right"),
333+
],
334+
)
335+
def test_add_frame_invalid(dtype, invalid):
336+
arr = pd.array(["a", np.nan], dtype=dtype)
337+
df = pd.DataFrame([[invalid, invalid]])
338+
339+
if dtype.storage == "pyarrow":
340+
if invalid == pd.Categorical(["test"]):
341+
msg = (
342+
"Incompatible type found when converting "
343+
"to PyArrow dtype for operation."
344+
)
345+
else:
346+
msg = (
347+
"Can only add string arrays to dtypes "
348+
"null, int, float, str, and binary."
349+
)
350+
with pytest.raises(TypeError, match=msg):
351+
arr + df
352+
else:
353+
msg = escape(
354+
"Only supports op(add) between StringArray and dtypes int, float, and str."
355+
)
356+
with pytest.raises(TypeError, match=msg):
357+
arr + df
358+
359+
294360
def test_comparison_methods_scalar(comparison_op, dtype):
295361
op_name = f"__{comparison_op.__name__}__"
296362
a = pd.array(["a", None, "c"], dtype=dtype)

pandas/tests/extension/base/ops.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
152152
# ndarray & other series
153153
op_name = all_arithmetic_operators
154154
ser = pd.Series(data)
155+
if op_name in ["__add__", "__radd__"]:
156+
pytest.mark.xfail(reason="Failed: DID NOT RAISE <class 'TypeError'>")
157+
155158
self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)))
156159

157160
def test_divmod(self, data):

0 commit comments

Comments
 (0)