Skip to content

Commit e6ef750

Browse files
committed
Handling the case where converting empty categorical to 'pyarrow' dtype_backend results in error. Since conversion of non-empty categorical returns categorical of 'numpy_nullable' dtype_backend, now, instead of raising an error, we ensure empty categorical is returned as well.
1 parent e78ebd3 commit e6ef750

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

pandas/core/dtypes/cast.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,7 +1112,7 @@ def convert_dtypes(
11121112

11131113
else:
11141114
inferred_dtype = input_array.dtype
1115-
1115+
11161116
if dtype_backend == "pyarrow":
11171117
from pandas.core.arrays.arrow.array import to_pyarrow_type
11181118
from pandas.core.arrays.string_ import StringDtype
@@ -1145,12 +1145,16 @@ def convert_dtypes(
11451145
and isna(input_array).all()
11461146
):
11471147
import pyarrow as pa
1148-
1148+
11491149
pa_type = pa.null()
11501150
else:
11511151
pa_type = to_pyarrow_type(base_dtype)
11521152
if pa_type is not None:
1153-
inferred_dtype = ArrowDtype(pa_type)
1153+
if isna(input_array).all() and hasattr(input_array, 'categories'):
1154+
inferred_dtype = input_array.dtype
1155+
else:
1156+
inferred_dtype = ArrowDtype(pa_type)
1157+
11541158
elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype):
11551159
# GH 53648
11561160
inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype]

pandas/tests/frame/methods/test_convert_dtypes.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,22 @@ def test_convert_empty(self):
3434
# Empty DataFrame can pass convert_dtypes, see GH#40393
3535
empty_df = pd.DataFrame()
3636
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())
37+
38+
def test_convert_empty_categorical_to_pyarrow(self):
39+
df = pd.DataFrame(
40+
{
41+
"A": pd.Series(pd.Categorical([None] * 5)),
42+
"B": pd.Series(pd.Categorical([None] * 5, categories=["B1", "B2"])),
43+
}
44+
)
45+
converted = df.convert_dtypes(dtype_backend="pyarrow")
46+
expected = df
47+
tm.assert_frame_equal(converted, expected)
48+
49+
assert df.A.dtype == "category", "Dtype in column A is not 'category'"
50+
assert df.B.dtype == "category", "Dtype in column B is not 'category'"
51+
assert df.A.cat.categories.empty, "Categories in column A are not empty"
52+
assert (df.B.cat.categories == ["B1", "B2"]).all(), "Categories in column A are not empty"
3753

3854
def test_convert_dtypes_retain_column_names(self):
3955
# GH#41435

pandas/tests/series/methods/test_convert_dtypes.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,16 @@ def test_convert_dtypes_pyarrow_null(self):
297297
result = ser.convert_dtypes(dtype_backend="pyarrow")
298298
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
299299
tm.assert_series_equal(result, expected)
300+
301+
def test_convert_empty_categorical_to_pyarrow(self):
302+
ser = pd.Series(pd.Series(pd.Categorical([None] * 5)))
303+
304+
converted = ser.convert_dtypes(dtype_backend="pyarrow")
305+
expected = ser
306+
tm.assert_series_equal(converted, expected)
307+
308+
assert ser.dtype == "category", "Series dtype is not 'category'"
309+
assert ser.cat.categories.empty, "Series categories are not empty"
310+
311+
ser2 = pd.Series(pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"])))
312+
assert (ser2.cat.categories == ["S1", "S2"]).all(), "Series categories are not empty"

0 commit comments

Comments
 (0)