diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 98b91bf4a152c..9686421045949 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -215,6 +215,7 @@ Other enhancements - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) +- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`) - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`) - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`) - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 169f4726146be..16ebd03a5f28d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -109,6 +109,7 @@ ) from pandas.core.dtypes.astype import astype_is_view +from pandas.core.dtypes.cast import can_hold_element from pandas.core.dtypes.common import ( ensure_object, ensure_platform_int, @@ -7117,53 +7118,69 @@ def fillna( new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace) elif isinstance(value, (dict, ABCSeries)): - if axis == 1: - raise NotImplementedError( - "Currently only can fill with dict/Series column by column" - ) result = self if inplace else self.copy(deep=False) - for k, v in value.items(): - if k not in result: - continue + if axis == 1: + # Check that all columns in result have the same dtype + # otherwise don't bother with fillna and losing accurate dtypes + unique_dtypes = algos.unique(self._mgr.get_dtypes()) + if len(unique_dtypes) > 1: + raise ValueError( + "All columns must have the same dtype, but got dtypes: " + f"{list(unique_dtypes)}" + ) + # Use the first column, which we have already validated has the + # same dtypes as the other columns. + if not can_hold_element(result.iloc[:, 0], value): + frame_dtype = unique_dtypes.item() + raise ValueError( + f"{value} not a suitable type to fill into {frame_dtype}" + ) + result = result.T.fillna(value=value).T + else: + for k, v in value.items(): + if k not in result: + continue - res_k = result[k].fillna(v, limit=limit) + res_k = result[k].fillna(v, limit=limit) - if not inplace: - result[k] = res_k - else: - # We can write into our existing column(s) iff dtype - # was preserved. - if isinstance(res_k, ABCSeries): - # i.e. 'k' only shows up once in self.columns - if res_k.dtype == result[k].dtype: - result.loc[:, k] = res_k - else: - # Different dtype -> no way to do inplace. - result[k] = res_k + if not inplace: + result[k] = res_k else: - # see test_fillna_dict_inplace_nonunique_columns - locs = result.columns.get_loc(k) - if isinstance(locs, slice): - locs = range(self.shape[1])[locs] - elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b": - locs = locs.nonzero()[0] - elif not ( - isinstance(locs, np.ndarray) and locs.dtype.kind == "i" - ): - # Should never be reached, but let's cover our bases - raise NotImplementedError( - "Unexpected get_loc result, please report a bug at " - "https://github.com/pandas-dev/pandas" - ) - - for i, loc in enumerate(locs): - res_loc = res_k.iloc[:, i] - target = self.iloc[:, loc] - - if res_loc.dtype == target.dtype: - result.iloc[:, loc] = res_loc + # We can write into our existing column(s) iff dtype + # was preserved. + if isinstance(res_k, ABCSeries): + # i.e. 'k' only shows up once in self.columns + if res_k.dtype == result[k].dtype: + result.loc[:, k] = res_k else: - result.isetitem(loc, res_loc) + # Different dtype -> no way to do inplace. + result[k] = res_k + else: + # see test_fillna_dict_inplace_nonunique_columns + locs = result.columns.get_loc(k) + if isinstance(locs, slice): + locs = range(self.shape[1])[locs] + elif ( + isinstance(locs, np.ndarray) and locs.dtype.kind == "b" + ): + locs = locs.nonzero()[0] + elif not ( + isinstance(locs, np.ndarray) and locs.dtype.kind == "i" + ): + # Should never be reached, but let's cover our bases + raise NotImplementedError( + "Unexpected get_loc result, please report a bug at " + "https://github.com/pandas-dev/pandas" + ) + + for i, loc in enumerate(locs): + res_loc = res_k.iloc[:, i] + target = self.iloc[:, loc] + + if res_loc.dtype == target.dtype: + result.iloc[:, loc] = res_loc + else: + result.isetitem(loc, res_loc) if inplace: return self._update_inplace(result) else: diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 8915d6f205d65..e4e6975ecd9af 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -461,9 +461,47 @@ def test_fillna_dict_series(self): expected = df.fillna(df.max().to_dict()) tm.assert_frame_equal(result, expected) - # disable this for now - with pytest.raises(NotImplementedError, match="column by column"): - df.fillna(df.max(axis=1), axis=1) + def test_fillna_dict_series_axis_1(self): + df = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, np.nan], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + } + ) + result = df.fillna(df.max(axis=1), axis=1) + df.fillna(df.max(axis=1), axis=1, inplace=True) + expected = DataFrame( + { + "a": [1.0, 1.0, 2.0, 3.0, 4.0], + "b": [1.0, 2.0, 3.0, 3.0, 4.0], + "c": [1.0, 1.0, 2.0, 3.0, 4.0], + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, expected) + + def test_fillna_dict_series_axis_1_mismatch_cols(self): + df = DataFrame( + { + "a": ["abc", "def", np.nan, "ghi", "jkl"], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + } + ) + with pytest.raises(ValueError, match="All columns must have the same dtype"): + df.fillna(Series({"a": "abc", "b": "def", "c": "hij"}), axis=1) + + def test_fillna_dict_series_axis_1_value_mismatch_with_cols(self): + df = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, np.nan], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + } + ) + with pytest.raises(ValueError, match=".* not a suitable type to fill into .*"): + df.fillna(Series({"a": "abc", "b": "def", "c": "hij"}), axis=1) def test_fillna_dataframe(self): # GH#8377