Skip to content

Commit c6cb66a

Browse files
authored
Merge branch 'main' into doc-check-array-indexer-int-slice
2 parents a906325 + b20d6ab commit c6cb66a

32 files changed

+330
-188
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ Other enhancements
215215
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
216216
- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
217217
- Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
218+
- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`)
218219
- Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
219220
- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
220221
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
@@ -1134,6 +1135,7 @@ Groupby/resample/rolling
11341135
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
11351136
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
11361137
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
1138+
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` where the end of window was not indexed correctly. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
11371139

11381140
Reshaping
11391141
^^^^^^^^^

pandas/_libs/hashing.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ def hash_object_array(
9191
hash(val)
9292
data = <bytes>str(val).encode(encoding)
9393
else:
94+
free(vecs)
95+
free(lens)
9496
raise TypeError(
9597
f"{val} of type {type(val)} is not a valid type for hashing, "
9698
"must be string or null"

pandas/_libs/window/aggregations.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
442442

443443
# Over the first window, observations can only be added
444444
# never removed
445-
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
445+
if i == 0 or not is_monotonic_increasing_bounds or s < end[i]:
446446

447447
prev_value = values[s]
448448
num_consecutive_same_value = 0

pandas/core/apply.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def compute_dict_like(
564564
indices = selected_obj.columns.get_indexer_for([key])
565565
labels = selected_obj.columns.take(indices)
566566
label_to_indices = defaultdict(list)
567-
for index, label in zip(indices, labels):
567+
for index, label in zip(indices, labels, strict=True):
568568
label_to_indices[label].append(index)
569569

570570
key_data = [
@@ -618,7 +618,9 @@ def wrap_results_dict_like(
618618
if all(is_ndframe):
619619
results = [result for result in result_data if not result.empty]
620620
keys_to_use: Iterable[Hashable]
621-
keys_to_use = [k for k, v in zip(result_index, result_data) if not v.empty]
621+
keys_to_use = [
622+
k for k, v in zip(result_index, result_data, strict=True) if not v.empty
623+
]
622624
# Have to check, if at least one DataFrame is not empty.
623625
if keys_to_use == []:
624626
keys_to_use = result_index
@@ -1359,7 +1361,7 @@ def series_generator(self) -> Generator[Series]:
13591361
yield obj._ixs(i, axis=0)
13601362

13611363
else:
1362-
for arr, name in zip(values, self.index):
1364+
for arr, name in zip(values, self.index, strict=True):
13631365
# GH#35462 re-pin mgr in case setitem changed it
13641366
ser._mgr = mgr
13651367
mgr.set_values(arr)
@@ -1913,7 +1915,7 @@ def relabel_result(
19131915
from pandas.core.indexes.base import Index
19141916

19151917
reordered_indexes = [
1916-
pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
1918+
pair[0] for pair in sorted(zip(columns, order, strict=True), key=lambda t: t[1])
19171919
]
19181920
reordered_result_in_dict: dict[Hashable, Series] = {}
19191921
idx = 0

pandas/core/array_algos/quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def _nanquantile(
197197
assert mask.shape == values.shape
198198
result = [
199199
_nanquantile_1d(val, m, qs, na_value, interpolation=interpolation)
200-
for (val, m) in zip(list(values), list(mask))
200+
for (val, m) in zip(list(values), list(mask), strict=True)
201201
]
202202
if values.dtype.kind == "f":
203203
# preserve itemsize

pandas/core/arraylike.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,9 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
298298

299299
# align all the inputs.
300300
types = tuple(type(x) for x in inputs)
301-
alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
301+
alignable = [
302+
x for x, t in zip(inputs, types, strict=True) if issubclass(t, NDFrame)
303+
]
302304

303305
if len(alignable) > 1:
304306
# This triggers alignment.
@@ -317,16 +319,16 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
317319
for obj in alignable[1:]:
318320
# this relies on the fact that we aren't handling mixed
319321
# series / frame ufuncs.
320-
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
322+
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes, strict=True)):
321323
axes[i] = ax1.union(ax2)
322324

323-
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
325+
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes, strict=True))
324326
inputs = tuple(
325327
x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
326-
for x, t in zip(inputs, types)
328+
for x, t in zip(inputs, types, strict=True)
327329
)
328330
else:
329-
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
331+
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes, strict=True))
330332

331333
if self.ndim == 1:
332334
names = {x.name for x in inputs if hasattr(x, "name")}
@@ -450,7 +452,7 @@ def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwarg
450452
if not isinstance(out, tuple) or len(out) != len(result):
451453
raise NotImplementedError
452454

453-
for arr, res in zip(out, result):
455+
for arr, res in zip(out, result, strict=True):
454456
_assign_where(arr, res, where)
455457

456458
return out

pandas/core/frame.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,7 +1524,7 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]:
15241524
"""
15251525
columns = self.columns
15261526
klass = self._constructor_sliced
1527-
for k, v in zip(self.index, self.values):
1527+
for k, v in zip(self.index, self.values, strict=True):
15281528
s = klass(v, index=columns, name=k).__finalize__(self)
15291529
if self._mgr.is_single_block:
15301530
s._mgr.add_references(self._mgr)
@@ -1607,10 +1607,10 @@ def itertuples(
16071607
itertuple = collections.namedtuple( # type: ignore[misc]
16081608
name, fields, rename=True
16091609
)
1610-
return map(itertuple._make, zip(*arrays))
1610+
return map(itertuple._make, zip(*arrays, strict=True))
16111611

16121612
# fallback to regular tuples
1613-
return zip(*arrays)
1613+
return zip(*arrays, strict=True)
16141614

16151615
def __len__(self) -> int:
16161616
"""
@@ -4359,7 +4359,7 @@ def _setitem_array(self, key, value) -> None:
43594359

43604360
if isinstance(value, DataFrame):
43614361
check_key_length(self.columns, key, value)
4362-
for k1, k2 in zip(key, value.columns):
4362+
for k1, k2 in zip(key, value.columns, strict=False):
43634363
self[k1] = value[k2]
43644364

43654365
elif not is_list_like(value):
@@ -4465,7 +4465,7 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
44654465
if len(cols_droplevel) and not cols_droplevel.equals(value.columns):
44664466
value = value.reindex(cols_droplevel, axis=1)
44674467

4468-
for col, col_droplevel in zip(cols, cols_droplevel):
4468+
for col, col_droplevel in zip(cols, cols_droplevel, strict=True):
44694469
self[col] = value[col_droplevel]
44704470
return
44714471

@@ -6567,7 +6567,11 @@ class max type
65676567
names = self.index._get_default_index_names(names, default)
65686568

65696569
if isinstance(self.index, MultiIndex):
6570-
to_insert = zip(reversed(self.index.levels), reversed(self.index.codes))
6570+
to_insert = zip(
6571+
reversed(self.index.levels),
6572+
reversed(self.index.codes),
6573+
strict=True,
6574+
)
65716575
else:
65726576
to_insert = ((self.index, None),)
65736577

@@ -7093,7 +7097,7 @@ def f(vals) -> tuple[np.ndarray, int]:
70937097
result.name = None
70947098
else:
70957099
vals = (col.values for name, col in self.items() if name in subset)
7096-
labels, shape = map(list, zip(*map(f, vals)))
7100+
labels, shape = map(list, zip(*map(f, vals), strict=True))
70977101

70987102
ids = get_group_index(labels, tuple(shape), sort=False, xnull=False)
70997103
result = self._constructor_sliced(duplicated(ids, keep), index=self.index)
@@ -7346,7 +7350,9 @@ def sort_values(
73467350

73477351
# need to rewrap columns in Series to apply key function
73487352
if key is not None:
7349-
keys_data = [Series(k, name=name) for (k, name) in zip(keys, by)]
7353+
keys_data = [
7354+
Series(k, name=name) for (k, name) in zip(keys, by, strict=True)
7355+
]
73507356
else:
73517357
# error: Argument 1 to "list" has incompatible type
73527358
# "Generator[ExtensionArray | ndarray[Any, Any], None, None]";
@@ -8208,7 +8214,7 @@ def _dispatch_frame_op(
82088214

82098215
arrays = [
82108216
array_op(_left, _right)
8211-
for _left, _right in zip(self._iter_column_arrays(), right)
8217+
for _left, _right in zip(self._iter_column_arrays(), right, strict=True)
82128218
]
82138219

82148220
elif isinstance(right, Series):
@@ -11745,7 +11751,7 @@ def c(x):
1174511751
return nanops.nancorr(x[0], x[1], method=method)
1174611752

1174711753
correl = self._constructor_sliced(
11748-
map(c, zip(left.values.T, right.values.T)),
11754+
map(c, zip(left.values.T, right.values.T, strict=True)),
1174911755
index=left.columns,
1175011756
copy=False,
1175111757
)

pandas/core/generic.py

Lines changed: 67 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
)
110110

111111
from pandas.core.dtypes.astype import astype_is_view
112+
from pandas.core.dtypes.cast import can_hold_element
112113
from pandas.core.dtypes.common import (
113114
ensure_object,
114115
ensure_platform_int,
@@ -614,7 +615,12 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:
614615
clean_column_name(k): Series(
615616
v, copy=False, index=self.index, name=k, dtype=dtype
616617
).__finalize__(self)
617-
for k, v, dtype in zip(self.columns, self._iter_column_arrays(), dtypes)
618+
for k, v, dtype in zip(
619+
self.columns,
620+
self._iter_column_arrays(),
621+
dtypes,
622+
strict=True,
623+
)
618624
}
619625

620626
@final
@@ -7117,53 +7123,69 @@ def fillna(
71177123
new_data = self._mgr.fillna(value=value, limit=limit, inplace=inplace)
71187124

71197125
elif isinstance(value, (dict, ABCSeries)):
7120-
if axis == 1:
7121-
raise NotImplementedError(
7122-
"Currently only can fill with dict/Series column by column"
7123-
)
71247126
result = self if inplace else self.copy(deep=False)
7125-
for k, v in value.items():
7126-
if k not in result:
7127-
continue
7127+
if axis == 1:
7128+
# Check that all columns in result have the same dtype
7129+
# otherwise don't bother with fillna and losing accurate dtypes
7130+
unique_dtypes = algos.unique(self._mgr.get_dtypes())
7131+
if len(unique_dtypes) > 1:
7132+
raise ValueError(
7133+
"All columns must have the same dtype, but got dtypes: "
7134+
f"{list(unique_dtypes)}"
7135+
)
7136+
# Use the first column, which we have already validated has the
7137+
# same dtypes as the other columns.
7138+
if not can_hold_element(result.iloc[:, 0], value):
7139+
frame_dtype = unique_dtypes.item()
7140+
raise ValueError(
7141+
f"{value} not a suitable type to fill into {frame_dtype}"
7142+
)
7143+
result = result.T.fillna(value=value).T
7144+
else:
7145+
for k, v in value.items():
7146+
if k not in result:
7147+
continue
71287148

7129-
res_k = result[k].fillna(v, limit=limit)
7149+
res_k = result[k].fillna(v, limit=limit)
71307150

7131-
if not inplace:
7132-
result[k] = res_k
7133-
else:
7134-
# We can write into our existing column(s) iff dtype
7135-
# was preserved.
7136-
if isinstance(res_k, ABCSeries):
7137-
# i.e. 'k' only shows up once in self.columns
7138-
if res_k.dtype == result[k].dtype:
7139-
result.loc[:, k] = res_k
7140-
else:
7141-
# Different dtype -> no way to do inplace.
7142-
result[k] = res_k
7151+
if not inplace:
7152+
result[k] = res_k
71437153
else:
7144-
# see test_fillna_dict_inplace_nonunique_columns
7145-
locs = result.columns.get_loc(k)
7146-
if isinstance(locs, slice):
7147-
locs = range(self.shape[1])[locs]
7148-
elif isinstance(locs, np.ndarray) and locs.dtype.kind == "b":
7149-
locs = locs.nonzero()[0]
7150-
elif not (
7151-
isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
7152-
):
7153-
# Should never be reached, but let's cover our bases
7154-
raise NotImplementedError(
7155-
"Unexpected get_loc result, please report a bug at "
7156-
"https://github.com/pandas-dev/pandas"
7157-
)
7158-
7159-
for i, loc in enumerate(locs):
7160-
res_loc = res_k.iloc[:, i]
7161-
target = self.iloc[:, loc]
7162-
7163-
if res_loc.dtype == target.dtype:
7164-
result.iloc[:, loc] = res_loc
7154+
# We can write into our existing column(s) iff dtype
7155+
# was preserved.
7156+
if isinstance(res_k, ABCSeries):
7157+
# i.e. 'k' only shows up once in self.columns
7158+
if res_k.dtype == result[k].dtype:
7159+
result.loc[:, k] = res_k
71657160
else:
7166-
result.isetitem(loc, res_loc)
7161+
# Different dtype -> no way to do inplace.
7162+
result[k] = res_k
7163+
else:
7164+
# see test_fillna_dict_inplace_nonunique_columns
7165+
locs = result.columns.get_loc(k)
7166+
if isinstance(locs, slice):
7167+
locs = range(self.shape[1])[locs]
7168+
elif (
7169+
isinstance(locs, np.ndarray) and locs.dtype.kind == "b"
7170+
):
7171+
locs = locs.nonzero()[0]
7172+
elif not (
7173+
isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
7174+
):
7175+
# Should never be reached, but let's cover our bases
7176+
raise NotImplementedError(
7177+
"Unexpected get_loc result, please report a bug at "
7178+
"https://github.com/pandas-dev/pandas"
7179+
)
7180+
7181+
for i, loc in enumerate(locs):
7182+
res_loc = res_k.iloc[:, i]
7183+
target = self.iloc[:, loc]
7184+
7185+
if res_loc.dtype == target.dtype:
7186+
result.iloc[:, loc] = res_loc
7187+
else:
7188+
result.isetitem(loc, res_loc)
71677189
if inplace:
71687190
return self._update_inplace(result)
71697191
else:
@@ -7546,7 +7568,7 @@ def replace(
75467568

75477569
items = list(to_replace.items())
75487570
if items:
7549-
keys, values = zip(*items)
7571+
keys, values = zip(*items, strict=True)
75507572
else:
75517573
keys, values = ([], []) # type: ignore[assignment]
75527574

@@ -7565,7 +7587,7 @@ def replace(
75657587
for k, v in items:
75667588
# error: Incompatible types in assignment (expression has type
75677589
# "list[Never]", variable has type "tuple[Any, ...]")
7568-
keys, values = list(zip(*v.items())) or ( # type: ignore[assignment]
7590+
keys, values = list(zip(*v.items(), strict=True)) or ( # type: ignore[assignment]
75697591
[],
75707592
[],
75717593
)

0 commit comments

Comments
 (0)