diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst index fbeea4ea24394..2d2553e7b32be 100644 --- a/doc/source/whatsnew/v2.3.2.rst +++ b/doc/source/whatsnew/v2.3.2.rst @@ -25,6 +25,7 @@ Bug fixes - Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the "string" type in the JSON Table Schema for :class:`StringDtype` columns (:issue:`61889`) +- Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`) - Fixed ``~Series.str.match``, ``~Series.str.fullmatch`` and ``~Series.str.contains`` with compiled regex for the Arrow-backed string dtype (:issue:`61964`, :issue:`61942`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 5b2d3b7c065a8..cee31d799a7ac 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -829,6 +829,25 @@ def _logical_method(self, other, op): # integer types. Otherwise these are boolean ops. if pa.types.is_integer(self._pa_array.type): return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS) + elif ( + ( + pa.types.is_string(self._pa_array.type) + or pa.types.is_large_string(self._pa_array.type) + ) + and op in (roperator.ror_, roperator.rand_, roperator.rxor) + and isinstance(other, np.ndarray) + and other.dtype == bool + ): + # GH#60234 backward compatibility for the move to StringDtype in 3.0 + op_name = op.__name__[1:].strip("_") + warnings.warn( + f"'{op_name}' operations between boolean dtype and {self.dtype} are " + "deprecated and will raise in a future version. Explicitly " + "cast the strings to a boolean dtype before operating instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + return op(other, self.astype(bool)) else: return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 9c8dc2054106a..d497c18cb27d6 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -49,6 +49,7 @@ missing, nanops, ops, + roperator, ) from pandas.core.algorithms import isin from pandas.core.array_algos import masked_reductions @@ -385,6 +386,26 @@ class BaseStringArray(ExtensionArray): dtype: StringDtype + # TODO(4.0): Once the deprecation here is enforced, this method can be + # removed and we use the parent class method instead. + def _logical_method(self, other, op): + if ( + op in (roperator.ror_, roperator.rand_, roperator.rxor) + and isinstance(other, np.ndarray) + and other.dtype == bool + ): + # GH#60234 backward compatibility for the move to StringDtype in 3.0 + op_name = op.__name__[1:].strip("_") + warnings.warn( + f"'{op_name}' operations between boolean dtype and {self.dtype} are " + "deprecated and will raise in a future version. Explicitly " + "cast the strings to a boolean dtype before operating instead.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + return op(other, self.astype(bool)) + return NotImplemented + @doc(ExtensionArray.tolist) def tolist(self): if self.ndim > 1: diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index c729b910d05a7..dc60f7daf6dd2 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -776,3 +776,27 @@ def test_series_str_decode(): result = Series([b"x", b"y"]).str.decode(encoding="UTF-8", errors="strict") expected = Series(["x", "y"], dtype="str") tm.assert_series_equal(result, expected) + + +def test_reversed_logical_ops(any_string_dtype): + # GH#60234 + dtype = any_string_dtype + warn = None if dtype == object else DeprecationWarning + left = Series([True, False, False, True]) + right = Series(["", "", "b", "c"], dtype=dtype) + + msg = "operations between boolean dtype and" + with tm.assert_produces_warning(warn, match=msg): + result = left | right + expected = left | right.astype(bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(warn, match=msg): + result = left & right + expected = left & right.astype(bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(warn, match=msg): + result = left ^ right + expected = left ^ right.astype(bool) + tm.assert_series_equal(result, expected)