diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6ebb51cd3ef89..092a0ec7cfcb5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -544,8 +544,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- -- +- Bug in :func:`convert_dtypes` with ``dtype_backend='pyarrow'`` parameter where empty categorical series raise error or get converted to null[pyarrow] (:issue:`59934`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6ba07b1761557..1255c5a557d27 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1143,6 +1143,7 @@ def convert_dtypes( base_dtype.kind == "O" # type: ignore[union-attr] and input_array.size > 0 and isna(input_array).all() + and not isinstance(input_array.dtype, CategoricalDtype) ): import pyarrow as pa diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index e7f6e5d625d3e..66f8a8085b748 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -34,6 +34,26 @@ def test_convert_empty(self): # Empty DataFrame can pass convert_dtypes, see GH#40393 empty_df = pd.DataFrame() tm.assert_frame_equal(empty_df, empty_df.convert_dtypes()) + + def test_convert_empty_categorical_to_pyarrow(self): + # GH#59934 + df = pd.DataFrame( + { + "A": pd.Categorical([None] * 5), + "B": pd.Categorical([None] * 5, categories=["B1", "B2"]), + } + ) + converted = df.convert_dtypes(dtype_backend="pyarrow") + expected = df + tm.assert_frame_equal(converted, expected) + + assert converted.A.dtype == "category", "Dtype in column A is not 'category'" + assert converted.B.dtype == "category", "Dtype in column B is not 'category'" + assert converted.A.cat.categories.empty, "Categories in column A are not empty" + assert ( + converted.B.cat.categories.__contains__("B1") + and converted.B.cat.categories.__contains__("B2") + ), "Categories in column B doesn't contain adequate categories" def test_convert_dtypes_retain_column_names(self): # GH#41435 diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 90c4056a39e84..eed6990a84d76 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -297,3 +297,20 @@ def test_convert_dtypes_pyarrow_null(self): result = ser.convert_dtypes(dtype_backend="pyarrow") expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null())) tm.assert_series_equal(result, expected) + + def test_convert_empty_categorical_to_pyarrow(self): + # GH#59934 + ser1 = pd.Series(pd.Categorical([None] * 5)) + converted1 = ser1.convert_dtypes(dtype_backend="pyarrow") + expected = ser1 + + tm.assert_series_equal(converted1, expected) + assert converted1.dtype == "category", "Series dtype is not 'category'" + assert converted1.cat.categories.empty, "Series categories are not empty" + + ser2 = pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"])) + converted2 = ser2.convert_dtypes(dtype_backend="pyarrow") + assert ( + converted2.cat.categories.__contains__("S1") + and converted2.cat.categories.__contains__("S2") + ), "Categories in ser2 doesn't contain adequate categories"