Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Bug fixes
with a compiled regex and custom flags (:issue:`62240`)
- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
- Fix error being raised when using a numpy ufunc with a Python-backed string array (:issue:`40800`)

Improvements and fixes for Copy-on-Write
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,14 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# e.g. test_np_max_nested_tuples
return result
else:
if self.dtype.type is str: # type: ignore[comparison-overlap]
# StringDtype
try:
return type(self)(result)
except ValueError:
# if validation of input fails (no strings)
# -> fallback to returning raw numpy array
return result
# one return value; re-box array-like results
return type(self)(result)

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,3 +864,30 @@ def test_tolist(dtype):
result = arr.tolist()
expected = vals
tm.assert_equal(result, expected)


@pytest.mark.parametrize("box", [pd.Series, pd.array])
def test_numpy_array_ufunc(dtype, box):
arr = box(["a", "bb", "ccc"], dtype=dtype)

# custom ufunc that works with string (object) input -> returning numeric
str_len_ufunc = np.frompyfunc(lambda x: len(x), 1, 1)
result = str_len_ufunc(arr)
expected_cls = pd.Series if box is pd.Series else np.array
# TODO we should infer int64 dtype here?
expected = expected_cls([1, 2, 3], dtype=object)
tm.assert_equal(result, expected)

# custom ufunc returning strings
str_multiply_ufunc = np.frompyfunc(lambda x: x * 2, 1, 1)
result = str_multiply_ufunc(arr)
expected = box(["aa", "bbbb", "cccccc"], dtype=dtype)
if dtype.storage == "pyarrow":
# TODO ArrowStringArray should also preserve the class / dtype
if box is pd.array:
expected = np.array(["aa", "bbbb", "cccccc"], dtype=object)
else:
# not specifying the dtype because the exact dtype is not yet preserved
expected = pd.Series(["aa", "bbbb", "cccccc"])

tm.assert_equal(result, expected)
Loading