Skip to content

Commit 94769a1

Browse files
committed
additional revisions
1 parent 5ce426d commit 94769a1

File tree

4 files changed

+36
-25
lines changed

4 files changed

+36
-25
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ Bug fixes
544544

545545
Categorical
546546
^^^^^^^^^^^
547-
-
547+
- Bug in :func:`convert_dtypes` with ``dtype_backend='pyarrow'`` parameter where empty categorical series raise error or get converted to null[pyarrow] (:issue:`59934`)
548548
-
549549

550550
Datetimelike

pandas/core/dtypes/cast.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,7 +1112,7 @@ def convert_dtypes(
11121112

11131113
else:
11141114
inferred_dtype = input_array.dtype
1115-
1115+
11161116
if dtype_backend == "pyarrow":
11171117
from pandas.core.arrays.arrow.array import to_pyarrow_type
11181118
from pandas.core.arrays.string_ import StringDtype
@@ -1143,14 +1143,15 @@ def convert_dtypes(
11431143
base_dtype.kind == "O" # type: ignore[union-attr]
11441144
and input_array.size > 0
11451145
and isna(input_array).all()
1146+
and not isinstance(input_array.dtype, CategoricalDtype)
11461147
):
11471148
import pyarrow as pa
1148-
1149+
11491150
pa_type = pa.null()
11501151
else:
11511152
pa_type = to_pyarrow_type(base_dtype)
11521153
if pa_type is not None:
1153-
if isna(input_array).all() and hasattr(input_array, 'categories'):
1154+
if isna(input_array).all() and hasattr(input_array, "categories"):
11541155
inferred_dtype = input_array.dtype
11551156
else:
11561157
inferred_dtype = ArrowDtype(pa_type)

pandas/tests/frame/methods/test_convert_dtypes.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,22 +34,27 @@ def test_convert_empty(self):
3434
# Empty DataFrame can pass convert_dtypes, see GH#40393
3535
empty_df = pd.DataFrame()
3636
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())
37-
37+
3838
def test_convert_empty_categorical_to_pyarrow(self):
39+
# GH#59934
3940
df = pd.DataFrame(
4041
{
41-
"A": pd.Series(pd.Categorical([None] * 5)),
42-
"B": pd.Series(pd.Categorical([None] * 5, categories=["B1", "B2"])),
43-
}
42+
"A": pd.Categorical([None] * 5),
43+
"B": pd.Categorical([None] * 5, categories=["B1", "B2"]),
44+
}
4445
)
4546
converted = df.convert_dtypes(dtype_backend="pyarrow")
4647
expected = df
4748
tm.assert_frame_equal(converted, expected)
48-
49-
assert df.A.dtype == "category", "Dtype in column A is not 'category'"
50-
assert df.B.dtype == "category", "Dtype in column B is not 'category'"
51-
assert df.A.cat.categories.empty, "Categories in column A are not empty"
52-
assert (df.B.cat.categories == ["B1", "B2"]).all(), "Categories in column A are not empty"
49+
50+
assert converted.A.dtype == "category", "Dtype in column A is not 'category'"
51+
assert converted.B.dtype == "category", "Dtype in column B is not 'category'"
52+
assert converted.A.cat.categories.empty, "Categories in column A are not empty"
53+
assert converted.B.cat.categories.__contains__(
54+
"B1"
55+
) and converted.B.cat.categories.__contains__(
56+
"B2"
57+
), "Categories in column B doesn't contain adequate categories"
5358

5459
def test_convert_dtypes_retain_column_names(self):
5560
# GH#41435

pandas/tests/series/methods/test_convert_dtypes.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -297,16 +297,21 @@ def test_convert_dtypes_pyarrow_null(self):
297297
result = ser.convert_dtypes(dtype_backend="pyarrow")
298298
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
299299
tm.assert_series_equal(result, expected)
300-
300+
301301
def test_convert_empty_categorical_to_pyarrow(self):
302-
ser = pd.Series(pd.Series(pd.Categorical([None] * 5)))
303-
304-
converted = ser.convert_dtypes(dtype_backend="pyarrow")
305-
expected = ser
306-
tm.assert_series_equal(converted, expected)
307-
308-
assert ser.dtype == "category", "Series dtype is not 'category'"
309-
assert ser.cat.categories.empty, "Series categories are not empty"
310-
311-
ser2 = pd.Series(pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"])))
312-
assert (ser2.cat.categories == ["S1", "S2"]).all(), "Series categories are not empty"
302+
# GH#59934
303+
ser1 = pd.Series(pd.Categorical([None] * 5))
304+
converted1 = ser1.convert_dtypes(dtype_backend="pyarrow")
305+
expected = ser1
306+
307+
tm.assert_series_equal(converted1, expected)
308+
assert converted1.dtype == "category", "Series dtype is not 'category'"
309+
assert converted1.cat.categories.empty, "Series categories are not empty"
310+
311+
ser2 = pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"]))
312+
converted2 = ser2.convert_dtypes(dtype_backend="pyarrow")
313+
assert converted2.cat.categories.__contains__(
314+
"S1"
315+
) and converted2.cat.categories.__contains__(
316+
"S2"
317+
), "Categories in ser2 doesn't contain adequate categories"

0 commit comments

Comments
 (0)