Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ Bug fixes
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
- Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
-
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
# GH 10696/18593/18630
dtype = self.dtype.update_dtype(dtype)
self = self.copy() if copy else self
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this line if we pass self._set_dtype(dtype, copy=copy)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't entirely sure. The self.copy is also creating a new Categorical object which is desired, I assume.

result = self._set_dtype(dtype)
result = self._set_dtype(dtype, copy=False)

elif isinstance(dtype, ExtensionDtype):
return super().astype(dtype, copy=copy)
Expand Down Expand Up @@ -945,7 +945,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:

super().__init__(self._ndarray, new_dtype)

def _set_dtype(self, dtype: CategoricalDtype) -> Self:
def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
"""
Internal method for directly updating the CategoricalDtype
Expand All @@ -958,7 +958,9 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
We don't do any validation here. It's assumed that the dtype is
a (valid) instance of `CategoricalDtype`.
"""
codes = recode_for_categories(self.codes, self.categories, dtype.categories)
codes = recode_for_categories(
self.codes, self.categories, dtype.categories, copy
)
return type(self)._simple_new(codes, dtype=dtype)

def set_ordered(self, value: bool) -> Self:
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/arrays/categorical/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ def test_astype_category(self, dtype_ordered, ordered):
expected = cat
tm.assert_categorical_equal(result, expected)

def test_astype_category_copy_false_nocopy_codes(self):
# GH#62000
cat = Categorical([3, 2, 4, 1])
new = cat.astype("category", copy=False)
assert new.codes.base is cat.codes.base or new.codes is cat.codes
new = cat.astype("category", copy=True)
assert not (new.codes.base is cat.codes.base or new.codes is cat.codes)

def test_astype_object_datetime_categories(self):
# GH#40754
cat = Categorical(to_datetime(["2021-03-27", NaT]))
Expand Down
Loading