diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d6a547b0cd98a..7ec50137c3039 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -947,6 +947,7 @@ Indexing - Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`) +- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`) - Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`) Missing diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 94437ac93570c..688567a8d0ad7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -50,6 +50,7 @@ is_list_like, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, SparseDtype, @@ -1138,7 +1139,24 @@ def fast_xs(self, loc: int) -> SingleBlockManager: # Such assignment may incorrectly coerce NaT to None # result[blk.mgr_locs] = blk._slice((slice(None), loc)) for i, rl in enumerate(blk.mgr_locs): - result[rl] = blk.iget((i, loc)) + item = blk.iget((i, loc)) + if ( + result.dtype.kind in "iub" + and lib.is_float(item) + and isna(item) + and isinstance(blk.dtype, CategoricalDtype) + ): + # GH#58954 caused bc interleaved_dtype is wrong for Categorical + # TODO(GH#38240) this will be unnecessary + # Note that doing this in a try/except would work for the + # integer case, but not for bool, which will cast the NaN + # entry to True. + if result.dtype.kind == "b": + new_dtype = object + else: + new_dtype = np.float64 + result = result.astype(new_dtype) + result[rl] = item if isinstance(dtype, ExtensionDtype): cls = dtype.construct_array_type() diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index c9f29b2cb55fe..a31f463d0b17e 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -571,3 +571,25 @@ def test_getitem_categorical_with_nan(self): df = DataFrame(ser) assert df.loc[np.nan, 0] == 2 assert df.loc[np.nan][0] == 2 + + def test_getitem_row_categorical_with_nan(self): + # GH#58954 + df = DataFrame({"a": [1, 2], "b": CategoricalIndex([1, None])}) + + res = df.iloc[1] + expected = Series([2, np.nan], index=df.columns, name=1) + tm.assert_series_equal(res, expected) + + res = df.loc[1] + tm.assert_series_equal(res, expected) + + def test_getitem_row_categorical_with_nan_bool(self): + # GH#58954 + df = DataFrame({"a": [True, False], "b": CategoricalIndex([False, None])}) + + res = df.iloc[1] + expected = Series([False, np.nan], index=df.columns, dtype=object, name=1) + tm.assert_series_equal(res, expected) + + res = df.loc[1] + tm.assert_series_equal(res, expected)