Skip to content

Commit 9d415ea

Browse files
veljaninyuanx749
authored andcommitted
Handling the case where converting empty categorical to 'pyarrow' dtype_backend results in error. Since conversion of non-empty categorical returns categorical of 'numpy_nullable' dtype_backend, now, instead of raising an error, we ensure empty categorical is returned as well.
1 parent 8943c97 commit 9d415ea

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

pandas/core/dtypes/cast.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,12 +1145,16 @@ def convert_dtypes(
11451145
and isna(input_array).all()
11461146
):
11471147
import pyarrow as pa
1148-
1148+
11491149
pa_type = pa.null()
11501150
else:
11511151
pa_type = to_pyarrow_type(base_dtype)
11521152
if pa_type is not None:
1153-
inferred_dtype = ArrowDtype(pa_type)
1153+
if isna(input_array).all() and hasattr(input_array, 'categories'):
1154+
inferred_dtype = input_array.dtype
1155+
else:
1156+
inferred_dtype = ArrowDtype(pa_type)
1157+
11541158
elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype):
11551159
# GH 53648
11561160
inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype]

pandas/tests/frame/methods/test_convert_dtypes.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,22 @@ def test_convert_empty(self):
3434
# Empty DataFrame can pass convert_dtypes, see GH#40393
3535
empty_df = pd.DataFrame()
3636
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())
37+
38+
def test_convert_empty_categorical_to_pyarrow(self):
39+
df = pd.DataFrame(
40+
{
41+
"A": pd.Series(pd.Categorical([None] * 5)),
42+
"B": pd.Series(pd.Categorical([None] * 5, categories=["B1", "B2"])),
43+
}
44+
)
45+
converted = df.convert_dtypes(dtype_backend="pyarrow")
46+
expected = df
47+
tm.assert_frame_equal(converted, expected)
48+
49+
assert df.A.dtype == "category", "Dtype in column A is not 'category'"
50+
assert df.B.dtype == "category", "Dtype in column B is not 'category'"
51+
assert df.A.cat.categories.empty, "Categories in column A are not empty"
52+
assert (df.B.cat.categories == ["B1", "B2"]).all(), "Categories in column A are not empty"
3753

3854
def test_convert_dtypes_retain_column_names(self):
3955
# GH#41435

0 commit comments

Comments
 (0)