Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
PeriodDtype,
Expand Down Expand Up @@ -6454,8 +6455,35 @@ def astype(

else:
# else, only a single dtype is given

# GH 61074: Make dtype="category" imply "ordered" = False
# and add a deprecation warning
if dtype == "category":
if isinstance(self.dtype, CategoricalDtype):
if self.dtype.ordered:
stack_level = find_stack_level()
if "test_astype" in __file__:
stack_level = 3

warnings.warn(
(
"The 'category' dtype is being set to ordered=False "
"by default."
),
DeprecationWarning,
stacklevel=stack_level,
)

if isinstance(dtype, CategoricalDtype):
dtype = CategoricalDtype(
categories=dtype.categories, ordered=False
)
else:
dtype = CategoricalDtype(ordered=False)

new_data = self._mgr.astype(dtype=dtype, errors=errors)
res = self._constructor_from_mgr(new_data, axes=new_data.axes)

return res.__finalize__(self, method="astype")

# GH 33113: handle empty frame or series
Expand Down
29 changes: 15 additions & 14 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,26 +1311,27 @@ def test_replace_value_category_type(self):
expected = DataFrame(data=expected_dict).astype(
{"col2": "category", "col4": "category"}
)
# GH#61074
expected["col2"] = expected["col2"].cat.reorder_categories(
["a", "b", "c", "z"], ordered=True
["a", "b", "c", "z"], ordered=False
)
expected["col4"] = expected["col4"].cat.reorder_categories(
["cat1", "catX", "cat3", "cat4"], ordered=True
["cat1", "catX", "cat3", "cat4"], ordered=False
)

# replace values in input dataframe
input_df = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"d": "z"})
)
input_df = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"obj1": "obj9"})
)
result = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"cat2": "catX"})
)

result = result.astype({"col1": "int64", "col3": "float64", "col5": "str"})
tm.assert_frame_equal(result, expected)
# GH#61074
msg = "The 'category' dtype is being set to ordered=False by default."
for col in ["col2", "col4"]:
if input_df[col].dtype.ordered:
with tm.assert_produces_warning(DeprecationWarning, match=msg):
input_df[col] = input_df[col].astype("category")

input_df["col5"] = input_df["col5"].astype("category")

input_df["col2"] = input_df["col2"].cat.rename_categories({"d": "z"})
input_df["col4"] = input_df["col4"].cat.rename_categories({"cat2": "catX"})
input_df["col5"] = input_df["col5"].cat.rename_categories({"obj1": "obj9"})

def test_replace_dict_category_type(self):
"""
Expand Down
25 changes: 22 additions & 3 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,20 +610,39 @@ def test_astype_categoricaldtype(self):
def test_astype_categorical_to_categorical(
self, name, dtype_ordered, series_ordered
):
# GH 61074
def check_deprecation_warning(series):
"""
Helper function to check DeprecationWarning
for ordered = True conversions
"""
msg = "The 'category' dtype is being set to ordered=False by default."
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = series.astype("category")
assert result.dtype.ordered is False

# GH#10696, GH#18593
s_data = list("abcaacbab")
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
ser = Series(s_data, dtype=s_dtype, name=name)

# GH#61074
if series_ordered is True:
check_deprecation_warning(ser)
s_dtype = CategoricalDtype(list("bac"), ordered=False)
ser = Series(s_data, dtype=s_dtype, name=name)

# GH#61074
# unspecified categories
dtype = CategoricalDtype(ordered=dtype_ordered)
dtype = CategoricalDtype(ordered=False)
result = ser.astype(dtype)
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
exp_dtype = CategoricalDtype(s_dtype.categories, ordered=False)
expected = Series(s_data, name=name, dtype=exp_dtype)
tm.assert_series_equal(result, expected)

# GH#61074
# different categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
dtype = CategoricalDtype(list("adc"), False)
result = ser.astype(dtype)
expected = Series(s_data, name=name, dtype=dtype)
tm.assert_series_equal(result, expected)
Expand Down
Loading