Skip to content

Commit ef256bf

Browse files
committed
following up on comments from rhshadrach
1 parent ba7e83d commit ef256bf

File tree

4 files changed

+39
-2
lines changed

4 files changed

+39
-2
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -544,8 +544,7 @@ Bug fixes
544544

545545
Categorical
546546
^^^^^^^^^^^
547-
-
548-
-
547+
- Bug in :func:`convert_dtypes` with ``dtype_backend='pyarrow'`` parameter where empty categorical series raise error or get converted to null[pyarrow] (:issue:`59934`)
549548

550549
Datetimelike
551550
^^^^^^^^^^^^

pandas/core/dtypes/cast.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,7 @@ def convert_dtypes(
11431143
base_dtype.kind == "O" # type: ignore[union-attr]
11441144
and input_array.size > 0
11451145
and isna(input_array).all()
1146+
and not isinstance(input_array.dtype, CategoricalDtype)
11461147
):
11471148
import pyarrow as pa
11481149

pandas/tests/frame/methods/test_convert_dtypes.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,26 @@ def test_convert_empty(self):
3434
# Empty DataFrame can pass convert_dtypes, see GH#40393
3535
empty_df = pd.DataFrame()
3636
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())
37+
38+
def test_convert_empty_categorical_to_pyarrow(self):
39+
# GH#59934
40+
df = pd.DataFrame(
41+
{
42+
"A": pd.Categorical([None] * 5),
43+
"B": pd.Categorical([None] * 5, categories=["B1", "B2"]),
44+
}
45+
)
46+
converted = df.convert_dtypes(dtype_backend="pyarrow")
47+
expected = df
48+
tm.assert_frame_equal(converted, expected)
49+
50+
assert converted.A.dtype == "category", "Dtype in column A is not 'category'"
51+
assert converted.B.dtype == "category", "Dtype in column B is not 'category'"
52+
assert converted.A.cat.categories.empty, "Categories in column A are not empty"
53+
assert (
54+
converted.B.cat.categories.__contains__("B1")
55+
and converted.B.cat.categories.__contains__("B2")
56+
), "Categories in column B doesn't contain adequate categories"
3757

3858
def test_convert_dtypes_retain_column_names(self):
3959
# GH#41435

pandas/tests/series/methods/test_convert_dtypes.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,20 @@ def test_convert_dtypes_pyarrow_null(self):
297297
result = ser.convert_dtypes(dtype_backend="pyarrow")
298298
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
299299
tm.assert_series_equal(result, expected)
300+
301+
def test_convert_empty_categorical_to_pyarrow(self):
302+
# GH#59934
303+
ser1 = pd.Series(pd.Categorical([None] * 5))
304+
converted1 = ser1.convert_dtypes(dtype_backend="pyarrow")
305+
expected = ser1
306+
307+
tm.assert_series_equal(converted1, expected)
308+
assert converted1.dtype == "category", "Series dtype is not 'category'"
309+
assert converted1.cat.categories.empty, "Series categories are not empty"
310+
311+
ser2 = pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"]))
312+
converted2 = ser2.convert_dtypes(dtype_backend="pyarrow")
313+
assert (
314+
converted2.cat.categories.__contains__("S1")
315+
and converted2.cat.categories.__contains__("S2")
316+
), "Categories in ser2 doesn't contain adequate categories"

0 commit comments

Comments
 (0)