diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7ec50137c3039..7f0a5f77d0905 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1064,6 +1064,8 @@ Reshaping - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`) - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`) +- Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`) +- Sparse ^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3bd35a6dfed29..a3634e370cfc3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1727,6 +1727,17 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if tipo is not None: # TODO: itemsize check? + + if isinstance(tipo, CategoricalDtype): + # GH#56376 + if tipo.categories.dtype.kind not in "iuf": + # Anything other than float/integer we cannot hold + raise LossySetitemError + casted = np.asarray(element, dtype=dtype) + if np.array_equal(casted, element, equal_nan=True): + return casted + raise LossySetitemError + if tipo.kind not in "iuf": # Anything other than float/integer we cannot hold raise LossySetitemError diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 55dfb41b8c894..26d297b86954d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5386,6 +5386,12 @@ def putmask(self, mask, value) -> Index: # See also: Block.coerce_to_target_dtype dtype = self._find_common_type_compat(value) + if dtype == self.dtype: + # GH#56376 avoid RecursionError + raise AssertionError( + "Something has gone wrong. Please report a bug at " + "github.com/pandas-dev/pandas" + ) from err return self.astype(dtype).putmask(mask, value) values = self._values.copy() diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py index 5ef021753b843..b833c601ec2c8 100644 --- a/pandas/tests/dtypes/cast/test_can_hold_element.py +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -2,6 +2,8 @@ from pandas.core.dtypes.cast import can_hold_element +from pandas import Categorical + def test_can_hold_element_range(any_int_numpy_dtype): # GH#44261 @@ -96,3 +98,11 @@ def test_can_hold_element_bool(): assert not can_hold_element(arr, element) assert not can_hold_element(arr, np.array([element])) assert not can_hold_element(arr, np.array([element], dtype=object)) + + +def test_can_hold_element_categorical(): + # GH#56376 + arr = np.array([], dtype=np.float64) + cat = Categorical([1, 2, None]) + + assert can_hold_element(arr, cat) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a8e29ef03acc2..1ddf00527196a 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -3070,3 +3070,17 @@ def test_merge_for_suffix_collisions(suffixes): df2 = DataFrame({"col1": [1], "col2": [2], "col2_dup": [3]}) with pytest.raises(MergeError, match="duplicate columns"): merge(df1, df2, on="col1", suffixes=suffixes) + + +def test_merge_categorical_key_recursion(): + # GH#56376 + lt = CategoricalDtype(categories=np.asarray([1, 2, 3], dtype="int64")) + rt = CategoricalDtype(categories=np.asarray([1, 2, 3], dtype="float64")) + left = DataFrame({"key": Series([1, 2], dtype=lt)}) + right = DataFrame({"key": Series([1, 3], dtype=rt)}) + + result = left.merge(right, on="key", how="outer") + expected = left.astype("int64").merge( + right.astype("float64"), on="key", how="outer" + ) + tm.assert_frame_equal(result, expected)