Skip to content

Commit fb3e85f

Browse files
committed
check allow_slice
2 parents 06af37c + b20d6ab commit fb3e85f

34 files changed

+377
-215
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ Other enhancements
215215
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
216216
- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
217217
- Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
218+
- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`)
218219
- Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
219220
- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
220221
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
@@ -1134,6 +1135,7 @@ Groupby/resample/rolling
11341135
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
11351136
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
11361137
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
1138+
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` where the end of window was not indexed correctly. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
11371139

11381140
Reshaping
11391141
^^^^^^^^^

pandas/_libs/hashing.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ def hash_object_array(
9191
hash(val)
9292
data = <bytes>str(val).encode(encoding)
9393
else:
94+
free(vecs)
95+
free(lens)
9496
raise TypeError(
9597
f"{val} of type {type(val)} is not a valid type for hashing, "
9698
"must be string or null"

pandas/_libs/window/aggregations.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
442442

443443
# Over the first window, observations can only be added
444444
# never removed
445-
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
445+
if i == 0 or not is_monotonic_increasing_bounds or s < end[i]:
446446

447447
prev_value = values[s]
448448
num_consecutive_same_value = 0

pandas/core/apply.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def compute_dict_like(
564564
indices = selected_obj.columns.get_indexer_for([key])
565565
labels = selected_obj.columns.take(indices)
566566
label_to_indices = defaultdict(list)
567-
for index, label in zip(indices, labels):
567+
for index, label in zip(indices, labels, strict=True):
568568
label_to_indices[label].append(index)
569569

570570
key_data = [
@@ -618,7 +618,9 @@ def wrap_results_dict_like(
618618
if all(is_ndframe):
619619
results = [result for result in result_data if not result.empty]
620620
keys_to_use: Iterable[Hashable]
621-
keys_to_use = [k for k, v in zip(result_index, result_data) if not v.empty]
621+
keys_to_use = [
622+
k for k, v in zip(result_index, result_data, strict=True) if not v.empty
623+
]
622624
# Have to check, if at least one DataFrame is not empty.
623625
if keys_to_use == []:
624626
keys_to_use = result_index
@@ -1359,7 +1361,7 @@ def series_generator(self) -> Generator[Series]:
13591361
yield obj._ixs(i, axis=0)
13601362

13611363
else:
1362-
for arr, name in zip(values, self.index):
1364+
for arr, name in zip(values, self.index, strict=True):
13631365
# GH#35462 re-pin mgr in case setitem changed it
13641366
ser._mgr = mgr
13651367
mgr.set_values(arr)
@@ -1913,7 +1915,7 @@ def relabel_result(
19131915
from pandas.core.indexes.base import Index
19141916

19151917
reordered_indexes = [
1916-
pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
1918+
pair[0] for pair in sorted(zip(columns, order, strict=True), key=lambda t: t[1])
19171919
]
19181920
reordered_result_in_dict: dict[Hashable, Series] = {}
19191921
idx = 0

pandas/core/array_algos/quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def _nanquantile(
197197
assert mask.shape == values.shape
198198
result = [
199199
_nanquantile_1d(val, m, qs, na_value, interpolation=interpolation)
200-
for (val, m) in zip(list(values), list(mask))
200+
for (val, m) in zip(list(values), list(mask), strict=True)
201201
]
202202
if values.dtype.kind == "f":
203203
# preserve itemsize

pandas/core/arraylike.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,9 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
298298

299299
# align all the inputs.
300300
types = tuple(type(x) for x in inputs)
301-
alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
301+
alignable = [
302+
x for x, t in zip(inputs, types, strict=True) if issubclass(t, NDFrame)
303+
]
302304

303305
if len(alignable) > 1:
304306
# This triggers alignment.
@@ -317,16 +319,16 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
317319
for obj in alignable[1:]:
318320
# this relies on the fact that we aren't handling mixed
319321
# series / frame ufuncs.
320-
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
322+
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes, strict=True)):
321323
axes[i] = ax1.union(ax2)
322324

323-
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
325+
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes, strict=True))
324326
inputs = tuple(
325327
x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
326-
for x, t in zip(inputs, types)
328+
for x, t in zip(inputs, types, strict=True)
327329
)
328330
else:
329-
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
331+
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes, strict=True))
330332

331333
if self.ndim == 1:
332334
names = {x.name for x in inputs if hasattr(x, "name")}
@@ -450,7 +452,7 @@ def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwarg
450452
if not isinstance(out, tuple) or len(out) != len(result):
451453
raise NotImplementedError
452454

453-
for arr, res in zip(out, result):
455+
for arr, res in zip(out, result, strict=True):
454456
_assign_where(arr, res, where)
455457

456458
return out

pandas/core/dtypes/inference.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,11 @@ def is_hashable(obj: object, allow_slice: bool | None = None) -> TypeGuard[Hasha
380380
"""
381381
Return True if hash(obj) will succeed, False otherwise.
382382
383+
If `allow_slice` is False, objects that are slices or tuples containing slices
384+
will always return False, even if hash(obj) would succeed.
385+
If `allow_slice` is True or None, slices and tuples containing slices are treated as
386+
hashable if hash(obj) does not raise TypeError.
387+
383388
Some types will pass a test against collections.abc.Hashable but fail when
384389
they are actually hashed with hash().
385390
@@ -391,17 +396,16 @@ def is_hashable(obj: object, allow_slice: bool | None = None) -> TypeGuard[Hasha
391396
obj : object
392397
The object to check for hashability. Any Python object can be passed here.
393398
allow_slice : bool or None
394-
If True, return True if the object is hashable (including slices).
399+
If True or None, return True if the object is hashable (including slices).
395400
If False, return True if the object is hashable and not a slice.
396-
If None, return True if the object is hashable without checking
397-
for slice type.
398401
399402
Returns
400403
-------
401404
bool
402405
True if object can be hashed (i.e., does not raise TypeError when
403-
passed to hash()) and allow_slice is True or None, and False otherwise
404-
(e.g., if object is mutable like a list or dictionary).
406+
passed to hash()) and passes the slice check according to 'allow_slice'.
407+
False otherwise (e.g., if object is mutable like a list or dictionary
408+
or if allow_slice is False and object is a slice or contains a slice).
405409
406410
See Also
407411
--------
@@ -419,6 +423,14 @@ def is_hashable(obj: object, allow_slice: bool | None = None) -> TypeGuard[Hasha
419423
True
420424
>>> is_hashable(a)
421425
False
426+
>>> is_hashable(slice(1, 2, 3))
427+
True
428+
>>> is_hashable(slice(1, 2, 3), allow_slice=False)
429+
False
430+
>>> is_hashable((slice(1, 2, 3),), allow_slice=False)
431+
False
432+
>>> is_hashable((slice(1, 2, 3),), allow_slice=True)
433+
True
422434
"""
423435
# Unfortunately, we can't use isinstance(obj, collections.abc.Hashable),
424436
# which can be faster than calling hash. That is because numpy scalars
@@ -435,13 +447,14 @@ def _contains_slice(x: object) -> bool:
435447
return True
436448
return False
437449

450+
if allow_slice is False and _contains_slice(obj):
451+
return False
452+
438453
try:
439454
hash(obj)
440455
except TypeError:
441456
return False
442457
else:
443-
if allow_slice is False and _contains_slice(obj):
444-
return False
445458
return True
446459

447460

pandas/core/frame.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,7 +1524,7 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]:
15241524
"""
15251525
columns = self.columns
15261526
klass = self._constructor_sliced
1527-
for k, v in zip(self.index, self.values):
1527+
for k, v in zip(self.index, self.values, strict=True):
15281528
s = klass(v, index=columns, name=k).__finalize__(self)
15291529
if self._mgr.is_single_block:
15301530
s._mgr.add_references(self._mgr)
@@ -1607,10 +1607,10 @@ def itertuples(
16071607
itertuple = collections.namedtuple( # type: ignore[misc]
16081608
name, fields, rename=True
16091609
)
1610-
return map(itertuple._make, zip(*arrays))
1610+
return map(itertuple._make, zip(*arrays, strict=True))
16111611

16121612
# fallback to regular tuples
1613-
return zip(*arrays)
1613+
return zip(*arrays, strict=True)
16141614

16151615
def __len__(self) -> int:
16161616
"""
@@ -4359,7 +4359,7 @@ def _setitem_array(self, key, value) -> None:
43594359

43604360
if isinstance(value, DataFrame):
43614361
check_key_length(self.columns, key, value)
4362-
for k1, k2 in zip(key, value.columns):
4362+
for k1, k2 in zip(key, value.columns, strict=False):
43634363
self[k1] = value[k2]
43644364

43654365
elif not is_list_like(value):
@@ -4465,7 +4465,7 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
44654465
if len(cols_droplevel) and not cols_droplevel.equals(value.columns):
44664466
value = value.reindex(cols_droplevel, axis=1)
44674467

4468-
for col, col_droplevel in zip(cols, cols_droplevel):
4468+
for col, col_droplevel in zip(cols, cols_droplevel, strict=True):
44694469
self[col] = value[col_droplevel]
44704470
return
44714471

@@ -6567,7 +6567,11 @@ class max type
65676567
names = self.index._get_default_index_names(names, default)
65686568

65696569
if isinstance(self.index, MultiIndex):
6570-
to_insert = zip(reversed(self.index.levels), reversed(self.index.codes))
6570+
to_insert = zip(
6571+
reversed(self.index.levels),
6572+
reversed(self.index.codes),
6573+
strict=True,
6574+
)
65716575
else:
65726576
to_insert = ((self.index, None),)
65736577

@@ -7093,7 +7097,7 @@ def f(vals) -> tuple[np.ndarray, int]:
70937097
result.name = None
70947098
else:
70957099
vals = (col.values for name, col in self.items() if name in subset)
7096-
labels, shape = map(list, zip(*map(f, vals)))
7100+
labels, shape = map(list, zip(*map(f, vals), strict=True))
70977101

70987102
ids = get_group_index(labels, tuple(shape), sort=False, xnull=False)
70997103
result = self._constructor_sliced(duplicated(ids, keep), index=self.index)
@@ -7346,7 +7350,9 @@ def sort_values(
73467350

73477351
# need to rewrap columns in Series to apply key function
73487352
if key is not None:
7349-
keys_data = [Series(k, name=name) for (k, name) in zip(keys, by)]
7353+
keys_data = [
7354+
Series(k, name=name) for (k, name) in zip(keys, by, strict=True)
7355+
]
73507356
else:
73517357
# error: Argument 1 to "list" has incompatible type
73527358
# "Generator[ExtensionArray | ndarray[Any, Any], None, None]";
@@ -8208,7 +8214,7 @@ def _dispatch_frame_op(
82088214

82098215
arrays = [
82108216
array_op(_left, _right)
8211-
for _left, _right in zip(self._iter_column_arrays(), right)
8217+
for _left, _right in zip(self._iter_column_arrays(), right, strict=True)
82128218
]
82138219

82148220
elif isinstance(right, Series):
@@ -11745,7 +11751,7 @@ def c(x):
1174511751
return nanops.nancorr(x[0], x[1], method=method)
1174611752

1174711753
correl = self._constructor_sliced(
11748-
map(c, zip(left.values.T, right.values.T)),
11754+
map(c, zip(left.values.T, right.values.T, strict=True)),
1174911755
index=left.columns,
1175011756
copy=False,
1175111757
)

0 commit comments

Comments
 (0)