|
109 | 109 | )
|
110 | 110 |
|
111 | 111 | from pandas.core.dtypes.astype import astype_is_view
|
| 112 | +from pandas.core.dtypes.cast import can_hold_element |
112 | 113 | from pandas.core.dtypes.common import (
|
113 | 114 | ensure_object,
|
114 | 115 | ensure_platform_int,
|
@@ -7117,53 +7118,69 @@ def fillna(
|
7117 | 7118 | new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
|
7118 | 7119 |
|
7119 | 7120 | elif isinstance(value, (dict, ABCSeries)):
|
7120 |
| - if axis == 1: |
7121 |
| - raise NotImplementedError( |
7122 |
| - "Currently only can fill with dict/Series column by column" |
7123 |
| - ) |
7124 | 7121 | result = self if inplace else self.copy(deep=False)
|
7125 |
| - for k, v in value.items(): |
7126 |
| - if k not in result: |
7127 |
| - continue |
| 7122 | + if axis == 1: |
| 7123 | + # Check that all columns in result have the same dtype |
| 7124 | + # otherwise don't bother with fillna and losing accurate dtypes |
| 7125 | + unique_dtypes = algos.unique(self._mgr.get_dtypes()) |
| 7126 | + if len(unique_dtypes) > 1: |
| 7127 | + raise ValueError( |
| 7128 | + "All columns must have the same dtype, but got dtypes: " |
| 7129 | + f"{list(unique_dtypes)}" |
| 7130 | + ) |
| 7131 | + # Use the first column, which we have already validated has the |
| 7132 | + # same dtypes as the other columns. |
| 7133 | + if not can_hold_element(result.iloc[:, 0], value): |
| 7134 | + frame_dtype = unique_dtypes.item() |
| 7135 | + raise ValueError( |
| 7136 | + f"{value} not a suitable type to fill into {frame_dtype}" |
| 7137 | + ) |
| 7138 | + result = result.T.fillna(value=value).T |
| 7139 | + else: |
| 7140 | + for k, v in value.items(): |
| 7141 | + if k not in result: |
| 7142 | + continue |
7128 | 7143 |
|
7129 |
| - res_k = result[k].fillna(v, limit=limit) |
| 7144 | + res_k = result[k].fillna(v, limit=limit) |
7130 | 7145 |
|
7131 |
| - if not inplace: |
7132 |
| - result[k] = res_k |
7133 |
| - else: |
7134 |
| - # We can write into our existing column(s) iff dtype |
7135 |
| - # was preserved. |
7136 |
| - if isinstance(res_k, ABCSeries): |
7137 |
| - # i.e. 'k' only shows up once in self.columns |
7138 |
| - if res_k.dtype == result[k].dtype: |
7139 |
| - result.loc[:, k] = res_k |
7140 |
| - else: |
7141 |
| - # Different dtype -> no way to do inplace. |
7142 |
| - result[k] = res_k |
| 7146 | + if not inplace: |
| 7147 | + result[k] = res_k |
7143 | 7148 | else:
|
7144 |
| - # see test_fillna_dict_inplace_nonunique_columns |
7145 |
| - locs = result.columns.get_loc(k) |
7146 |
| - if isinstance(locs, slice): |
7147 |
| - locs = range(self.shape[1])[locs] |
7148 |
| - elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b": |
7149 |
| - locs = locs.nonzero()[0] |
7150 |
| - elif not ( |
7151 |
| - isinstance(locs, np.ndarray) and locs.dtype.kind == "i" |
7152 |
| - ): |
7153 |
| - # Should never be reached, but let's cover our bases |
7154 |
| - raise NotImplementedError( |
7155 |
| - "Unexpected get_loc result, please report a bug at " |
7156 |
| - "https://github.com/pandas-dev/pandas" |
7157 |
| - ) |
7158 |
| - |
7159 |
| - for i, loc in enumerate(locs): |
7160 |
| - res_loc = res_k.iloc[:, i] |
7161 |
| - target = self.iloc[:, loc] |
7162 |
| - |
7163 |
| - if res_loc.dtype == target.dtype: |
7164 |
| - result.iloc[:, loc] = res_loc |
| 7149 | + # We can write into our existing column(s) iff dtype |
| 7150 | + # was preserved. |
| 7151 | + if isinstance(res_k, ABCSeries): |
| 7152 | + # i.e. 'k' only shows up once in self.columns |
| 7153 | + if res_k.dtype == result[k].dtype: |
| 7154 | + result.loc[:, k] = res_k |
7165 | 7155 | else:
|
7166 |
| - result.isetitem(loc, res_loc) |
| 7156 | + # Different dtype -> no way to do inplace. |
| 7157 | + result[k] = res_k |
| 7158 | + else: |
| 7159 | + # see test_fillna_dict_inplace_nonunique_columns |
| 7160 | + locs = result.columns.get_loc(k) |
| 7161 | + if isinstance(locs, slice): |
| 7162 | + locs = range(self.shape[1])[locs] |
| 7163 | + elif ( |
| 7164 | + isinstance(locs, np.ndarray) and locs.dtype.kind == "b" |
| 7165 | + ): |
| 7166 | + locs = locs.nonzero()[0] |
| 7167 | + elif not ( |
| 7168 | + isinstance(locs, np.ndarray) and locs.dtype.kind == "i" |
| 7169 | + ): |
| 7170 | + # Should never be reached, but let's cover our bases |
| 7171 | + raise NotImplementedError( |
| 7172 | + "Unexpected get_loc result, please report a bug at " |
| 7173 | + "https://github.com/pandas-dev/pandas" |
| 7174 | + ) |
| 7175 | + |
| 7176 | + for i, loc in enumerate(locs): |
| 7177 | + res_loc = res_k.iloc[:, i] |
| 7178 | + target = self.iloc[:, loc] |
| 7179 | + |
| 7180 | + if res_loc.dtype == target.dtype: |
| 7181 | + result.iloc[:, loc] = res_loc |
| 7182 | + else: |
| 7183 | + result.isetitem(loc, res_loc) |
7167 | 7184 | if inplace:
|
7168 | 7185 | return self._update_inplace(result)
|
7169 | 7186 | else:
|
|
0 commit comments