Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ Other enhancements
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
- Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`)
- Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
Expand Down
101 changes: 59 additions & 42 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
)

from pandas.core.dtypes.astype import astype_is_view
from pandas.core.dtypes.cast import can_hold_element
from pandas.core.dtypes.common import (
ensure_object,
ensure_platform_int,
Expand Down Expand Up @@ -7117,53 +7118,69 @@ def fillna(
new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)

elif isinstance(value, (dict, ABCSeries)):
if axis == 1:
raise NotImplementedError(
"Currently only can fill with dict/Series column by column"
)
result = self if inplace else self.copy(deep=False)
for k, v in value.items():
if k not in result:
continue
if axis == 1:
# Check that all columns in result have the same dtype
# otherwise don't bother with fillna and losing accurate dtypes
unique_dtypes = algos.unique(self._mgr.get_dtypes())
if len(unique_dtypes) > 1:
raise ValueError(
"All columns must have the same dtype, but got dtypes: "
f"{list(unique_dtypes)}"
)
# Use the first column, which we have already validated has the
# same dtypes as the other columns.
if not can_hold_element(result.iloc[:, 0], value):
frame_dtype = unique_dtypes.item()
raise ValueError(
f"{value} not a suitable type to fill into {frame_dtype}"
)
result = result.T.fillna(value=value).T
else:
for k, v in value.items():
if k not in result:
continue

res_k = result[k].fillna(v, limit=limit)
res_k = result[k].fillna(v, limit=limit)

if not inplace:
result[k] = res_k
else:
# We can write into our existing column(s) iff dtype
# was preserved.
if isinstance(res_k, ABCSeries):
# i.e. 'k' only shows up once in self.columns
if res_k.dtype == result[k].dtype:
result.loc[:, k] = res_k
else:
# Different dtype -> no way to do inplace.
result[k] = res_k
if not inplace:
result[k] = res_k
else:
# see test_fillna_dict_inplace_nonunique_columns
locs = result.columns.get_loc(k)
if isinstance(locs, slice):
locs = range(self.shape[1])[locs]
elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b":
locs = locs.nonzero()[0]
elif not (
isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
):
# Should never be reached, but let's cover our bases
raise NotImplementedError(
"Unexpected get_loc result, please report a bug at "
"https://github.com/pandas-dev/pandas"
)

for i, loc in enumerate(locs):
res_loc = res_k.iloc[:, i]
target = self.iloc[:, loc]

if res_loc.dtype == target.dtype:
result.iloc[:, loc] = res_loc
# We can write into our existing column(s) iff dtype
# was preserved.
if isinstance(res_k, ABCSeries):
# i.e. 'k' only shows up once in self.columns
if res_k.dtype == result[k].dtype:
result.loc[:, k] = res_k
else:
result.isetitem(loc, res_loc)
# Different dtype -> no way to do inplace.
result[k] = res_k
else:
# see test_fillna_dict_inplace_nonunique_columns
locs = result.columns.get_loc(k)
if isinstance(locs, slice):
locs = range(self.shape[1])[locs]
elif (
isinstance(locs, np.ndarray) and locs.dtype.kind == "b"
):
locs = locs.nonzero()[0]
elif not (
isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
):
# Should never be reached, but let's cover our bases
raise NotImplementedError(
"Unexpected get_loc result, please report a bug at "
"https://github.com/pandas-dev/pandas"
)

for i, loc in enumerate(locs):
res_loc = res_k.iloc[:, i]
target = self.iloc[:, loc]

if res_loc.dtype == target.dtype:
result.iloc[:, loc] = res_loc
else:
result.isetitem(loc, res_loc)
if inplace:
return self._update_inplace(result)
else:
Expand Down
44 changes: 41 additions & 3 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,9 +461,47 @@ def test_fillna_dict_series(self):
expected = df.fillna(df.max().to_dict())
tm.assert_frame_equal(result, expected)

# disable this for now
with pytest.raises(NotImplementedError, match="column by column"):
df.fillna(df.max(axis=1), axis=1)
def test_fillna_dict_series_axis_1(self):
df = DataFrame(
{
"a": [np.nan, 1, 2, np.nan, np.nan],
"b": [1, 2, 3, np.nan, np.nan],
"c": [np.nan, 1, 2, 3, 4],
}
)
result = df.fillna(df.max(axis=1), axis=1)
df.fillna(df.max(axis=1), axis=1, inplace=True)
expected = DataFrame(
{
"a": [1.0, 1.0, 2.0, 3.0, 4.0],
"b": [1.0, 2.0, 3.0, 3.0, 4.0],
"c": [1.0, 1.0, 2.0, 3.0, 4.0],
}
)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(df, expected)

def test_fillna_dict_series_axis_1_mismatch_cols(self):
df = DataFrame(
{
"a": ["abc", "def", np.nan, "ghi", "jkl"],
"b": [1, 2, 3, np.nan, np.nan],
"c": [np.nan, 1, 2, 3, 4],
}
)
with pytest.raises(ValueError, match="All columns must have the same dtype"):
df.fillna(Series({"a": "abc", "b": "def", "c": "hij"}), axis=1)

def test_fillna_dict_series_axis_1_value_mismatch_with_cols(self):
df = DataFrame(
{
"a": [np.nan, 1, 2, np.nan, np.nan],
"b": [1, 2, 3, np.nan, np.nan],
"c": [np.nan, 1, 2, 3, 4],
}
)
with pytest.raises(ValueError, match=".* not a suitable type to fill into .*"):
df.fillna(Series({"a": "abc", "b": "def", "c": "hij"}), axis=1)

def test_fillna_dataframe(self):
# GH#8377
Expand Down
Loading