Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ Bug fixes

Categorical
^^^^^^^^^^^
-
- Bug in :func:`convert_dtypes` with ``dtype_backend='pyarrow'`` parameter where empty categorical series raise error or get converted to null[pyarrow] (:issue:`59934`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- Bug in :func:`convert_dtypes` with ``dtype_backend='pyarrow'`` parameter where empty categorical series raise error or get converted to null[pyarrow] (:issue:`59934`)
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend='pyarrow'`` parameter where empty categorical series raise error or get converted to null[pyarrow] (:issue:`59934`)

-

Datetimelike
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,7 @@ def convert_dtypes(
base_dtype.kind == "O" # type: ignore[union-attr]
and input_array.size > 0
and isna(input_array).all()
and not isinstance(input_array.dtype, CategoricalDtype)
):
import pyarrow as pa

Expand All @@ -1151,6 +1152,7 @@ def convert_dtypes(
pa_type = to_pyarrow_type(base_dtype)
if pa_type is not None:
inferred_dtype = ArrowDtype(pa_type)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you revert the addition of this newline

elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype):
# GH 53648
inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype]
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,27 @@ def test_convert_empty(self):
empty_df = pd.DataFrame()
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())

def test_convert_empty_categorical_to_pyarrow(self):
# GH#59934
df = pd.DataFrame(
{
"A": pd.Categorical([None] * 5),
"B": pd.Categorical([None] * 5, categories=["B1", "B2"]),
}
)
converted = df.convert_dtypes(dtype_backend="pyarrow")
expected = df
tm.assert_frame_equal(converted, expected)

assert converted.A.dtype == "category", "Dtype in column A is not 'category'"
assert converted.B.dtype == "category", "Dtype in column B is not 'category'"
assert converted.A.cat.categories.empty, "Categories in column A are not empty"
assert converted.B.cat.categories.__contains__(
"B1"
) and converted.B.cat.categories.__contains__(
"B2"
), "Categories in column B doesn't contain adequate categories"

def test_convert_dtypes_retain_column_names(self):
# GH#41435
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,21 @@ def test_convert_dtypes_pyarrow_null(self):
result = ser.convert_dtypes(dtype_backend="pyarrow")
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
tm.assert_series_equal(result, expected)

def test_convert_empty_categorical_to_pyarrow(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def test_convert_empty_categorical_to_pyarrow(self):
def test_convert_empty_categorical_to_pyarrow(self):
pytest.importorskip("pyarrow")

# GH#59934
ser1 = pd.Series(pd.Categorical([None] * 5))
converted1 = ser1.convert_dtypes(dtype_backend="pyarrow")
expected = ser1

tm.assert_series_equal(converted1, expected)
assert converted1.dtype == "category", "Series dtype is not 'category'"
assert converted1.cat.categories.empty, "Series categories are not empty"
Comment on lines +310 to +311
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar, can remove.


ser2 = pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"]))
converted2 = ser2.convert_dtypes(dtype_backend="pyarrow")
assert converted2.cat.categories.__contains__(
"S1"
) and converted2.cat.categories.__contains__(
"S2"
), "Categories in ser2 doesn't contain adequate categories"
Loading