ENH: adds columnwise fillna support (#62393)

a18rhodes · web-flow · commit f4403451f8d7 · 2025-10-04T09:14:06.000-07:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -215,6 +215,7 @@ Other enhancements
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
 - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
 - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
+- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`)
 - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
 - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
 - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -109,6 +109,7 @@
 )
 
 from pandas.core.dtypes.astype import astype_is_view
+from pandas.core.dtypes.cast import can_hold_element
 from pandas.core.dtypes.common import (
     ensure_object,
     ensure_platform_int,
@@ -7117,53 +7118,69 @@ def fillna(
             new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
 
         elif isinstance(value, (dict, ABCSeries)):
-            if axis == 1:
-                raise NotImplementedError(
-                    "Currently only can fill with dict/Series column by column"
-                )
             result = self if inplace else self.copy(deep=False)
-            for k, v in value.items():
-                if k not in result:
-                    continue
+            if axis == 1:
+                # Check that all columns in result have the same dtype
+                # otherwise don't bother with fillna and losing accurate dtypes
+                unique_dtypes = algos.unique(self._mgr.get_dtypes())
+                if len(unique_dtypes) > 1:
+                    raise ValueError(
+                        "All columns must have the same dtype, but got dtypes: "
+                        f"{list(unique_dtypes)}"
+                    )
+                # Use the first column, which we have already validated has the
+                # same dtypes as the other columns.
+                if not can_hold_element(result.iloc[:, 0], value):
+                    frame_dtype = unique_dtypes.item()
+                    raise ValueError(
+                        f"{value} not a suitable type to fill into {frame_dtype}"
+                    )
+                result = result.T.fillna(value=value).T
+            else:
+                for k, v in value.items():
+                    if k not in result:
+                        continue
 
-                res_k = result[k].fillna(v, limit=limit)
+                    res_k = result[k].fillna(v, limit=limit)
 
-                if not inplace:
-                    result[k] = res_k
-                else:
-                    # We can write into our existing column(s) iff dtype
-                    #  was preserved.
-                    if isinstance(res_k, ABCSeries):
-                        # i.e. 'k' only shows up once in self.columns
-                        if res_k.dtype == result[k].dtype:
-                            result.loc[:, k] = res_k
-                        else:
-                            # Different dtype -> no way to do inplace.
-                            result[k] = res_k
+                    if not inplace:
+                        result[k] = res_k
                     else:
-                        # see test_fillna_dict_inplace_nonunique_columns
-                        locs = result.columns.get_loc(k)
-                        if isinstance(locs, slice):
-                            locs = range(self.shape[1])[locs]
-                        elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b":
-                            locs = locs.nonzero()[0]
-                        elif not (
-                            isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
-                        ):
-                            # Should never be reached, but let's cover our bases
-                            raise NotImplementedError(
-                                "Unexpected get_loc result, please report a bug at "
-                                "https://github.com/pandas-dev/pandas"
-                            )
-
-                        for i, loc in enumerate(locs):
-                            res_loc = res_k.iloc[:, i]
-                            target = self.iloc[:, loc]
-
-                            if res_loc.dtype == target.dtype:
-                                result.iloc[:, loc] = res_loc
+                        # We can write into our existing column(s) iff dtype
+                        #  was preserved.
+                        if isinstance(res_k, ABCSeries):
+                            # i.e. 'k' only shows up once in self.columns
+                            if res_k.dtype == result[k].dtype:
+                                result.loc[:, k] = res_k
                             else:
-                                result.isetitem(loc, res_loc)
+                                # Different dtype -> no way to do inplace.
+                                result[k] = res_k
+                        else:
+                            # see test_fillna_dict_inplace_nonunique_columns
+                            locs = result.columns.get_loc(k)
+                            if isinstance(locs, slice):
+                                locs = range(self.shape[1])[locs]
+                            elif (
+                                isinstance(locs, np.ndarray) and locs.dtype.kind == "b"
+                            ):
+                                locs = locs.nonzero()[0]
+                            elif not (
+                                isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
+                            ):
+                                # Should never be reached, but let's cover our bases
+                                raise NotImplementedError(
+                                    "Unexpected get_loc result, please report a bug at "
+                                    "https://github.com/pandas-dev/pandas"
+                                )
+
+                            for i, loc in enumerate(locs):
+                                res_loc = res_k.iloc[:, i]
+                                target = self.iloc[:, loc]
+
+                                if res_loc.dtype == target.dtype:
+                                    result.iloc[:, loc] = res_loc
+                                else:
+                                    result.isetitem(loc, res_loc)
             if inplace:
                 return self._update_inplace(result)
             else:
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
@@ -461,9 +461,47 @@ def test_fillna_dict_series(self):
         expected = df.fillna(df.max().to_dict())
         tm.assert_frame_equal(result, expected)
 
-        # disable this for now
-        with pytest.raises(NotImplementedError, match="column by column"):
-            df.fillna(df.max(axis=1), axis=1)
+    def test_fillna_dict_series_axis_1(self):
+        df = DataFrame(
+            {
+                "a": [np.nan, 1, 2, np.nan, np.nan],
+                "b": [1, 2, 3, np.nan, np.nan],
+                "c": [np.nan, 1, 2, 3, 4],
+            }
+        )
+        result = df.fillna(df.max(axis=1), axis=1)
+        df.fillna(df.max(axis=1), axis=1, inplace=True)
+        expected = DataFrame(
+            {
+                "a": [1.0, 1.0, 2.0, 3.0, 4.0],
+                "b": [1.0, 2.0, 3.0, 3.0, 4.0],
+                "c": [1.0, 1.0, 2.0, 3.0, 4.0],
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(df, expected)
+
+    def test_fillna_dict_series_axis_1_mismatch_cols(self):
+        df = DataFrame(
+            {
+                "a": ["abc", "def", np.nan, "ghi", "jkl"],
+                "b": [1, 2, 3, np.nan, np.nan],
+                "c": [np.nan, 1, 2, 3, 4],
+            }
+        )
+        with pytest.raises(ValueError, match="All columns must have the same dtype"):
+            df.fillna(Series({"a": "abc", "b": "def", "c": "hij"}), axis=1)
+
+    def test_fillna_dict_series_axis_1_value_mismatch_with_cols(self):
+        df = DataFrame(
+            {
+                "a": [np.nan, 1, 2, np.nan, np.nan],
+                "b": [1, 2, 3, np.nan, np.nan],
+                "c": [np.nan, 1, 2, 3, 4],
+            }
+        )
+        with pytest.raises(ValueError, match=".* not a suitable type to fill into .*"):
+            df.fillna(Series({"a": "abc", "b": "def", "c": "hij"}), axis=1)
 
     def test_fillna_dataframe(self):
         # GH#8377