Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Fixed regressions
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
- Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
- Fixed regression in :meth:`DataFrame.astype` with ``dtype=str`` failing to convert ``NaN`` in categorical columns (:issue:`41797`)

.. ---------------------------------------------------------------------------
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,14 +523,17 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
try:
new_cats = np.asarray(self.categories)
new_cats = new_cats.astype(dtype=dtype, copy=copy)
fill_value = np.array(np.nan).astype(dtype).item()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are cases where .item() has weird behavior and we use lib.item_from_zerodim instead. no idea if any of those cases are reachable here

except (
TypeError, # downstream error msg for CategoricalIndex is misleading
ValueError,
):
msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}"
raise ValueError(msg)

result = take_nd(new_cats, ensure_platform_int(self._codes))
result = take_nd(
new_cats, ensure_platform_int(self._codes), fill_value=fill_value
)

return result

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,3 +698,11 @@ def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype):
{"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)}
)
tm.assert_frame_equal(df, expected)

def test_astype_categorical_to_string_missing(self):
# https://github.com/pandas-dev/pandas/issues/41797
df = DataFrame(["a", "b", np.nan])
expected = df.astype(str)
cat = df.astype("category")
result = cat.astype(str)
tm.assert_frame_equal(result, expected)