Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,7 @@ Bug fixes
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty categorical series raise error or get converted to ``null[pyarrow]`` (:issue:`59934`)
-

Datetimelike
Expand Down
1 change: 1 addition & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,7 @@ def convert_dtypes(
base_dtype.kind == "O" # type: ignore[union-attr]
and input_array.size > 0
and isna(input_array).all()
and not isinstance(input_array.dtype, CategoricalDtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Could you put this check above the isna(...).all() check as it's less expensive?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, could you properly add an elif isinstance(inferred_dtype, CategoricalDtype) clause for this type in the above if/else branch?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this to the outer if branch, to skip the arrow dtype conversion for CategoricalDtype.

As mentioned in #59934, the expected behavior of convert_dtypes for CategoricalDtype series essentially ignores the requested pyarrow backend conversion.

):
import pyarrow as pa

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -35,6 +37,19 @@ def test_convert_empty(self):
empty_df = pd.DataFrame()
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())

@td.skip_if_no("pyarrow")
def test_convert_empty_categorical_to_pyarrow(self):
# GH#59934
df = pd.DataFrame(
{
"A": pd.Categorical([None] * 5),
"B": pd.Categorical([None] * 5, categories=["B1", "B2"]),
}
)
converted = df.convert_dtypes(dtype_backend="pyarrow")
expected = df
tm.assert_frame_equal(converted, expected)

def test_convert_dtypes_retain_column_names(self):
# GH#41435
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from pandas._libs import lib
import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm
Expand Down Expand Up @@ -298,6 +299,19 @@ def test_convert_dtypes_pyarrow_null(self):
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
tm.assert_series_equal(result, expected)

@td.skip_if_no("pyarrow")
def test_convert_empty_categorical_to_pyarrow(self):
# GH#59934
ser1 = pd.Series(pd.Categorical([None] * 5))
converted1 = ser1.convert_dtypes(dtype_backend="pyarrow")
expected = ser1
tm.assert_series_equal(converted1, expected)

ser2 = pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"]))
converted2 = ser2.convert_dtypes(dtype_backend="pyarrow")
expected = ser2
tm.assert_series_equal(converted2, expected)

def test_convert_dtype_pyarrow_timezone_preserve(self):
# GH 60237
pytest.importorskip("pyarrow")
Expand Down
Loading