From 8ea853af7288c0e847569413da2fd8c5e0d606f6 Mon Sep 17 00:00:00 2001 From: vishal Date: Thu, 2 Oct 2025 22:21:03 +0100 Subject: [PATCH 1/2] Remove Zip Strict entry from .toml for pandas/util --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f5a34d71c815f..39dcca9ca2160 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -632,8 +632,6 @@ exclude = [ "pandas/tests/window/test_cython_aggregations.py" = ["B905"] "pandas/tests/window/test_expanding.py" = ["B905"] "pandas/tests/window/test_rolling.py" = ["B905"] -"pandas/util/_doctools.py" = ["B905"] -"pandas/util/_validators.py" = ["B905"] "scripts/validate_unwanted_patterns.py" = ["B905"] [tool.ruff.lint.flake8-pytest-style] From d52e4aacab54caec888d4c38a46da1ed2103a174 Mon Sep 17 00:00:00 2001 From: vishal Date: Sat, 4 Oct 2025 13:06:57 +0100 Subject: [PATCH 2/2] Zip Strict for pandas/core level files #62469 --- pandas/core/apply.py | 10 ++++++---- pandas/core/arraylike.py | 14 ++++++++------ pandas/core/frame.py | 26 ++++++++++++++++---------- pandas/core/generic.py | 11 ++++++++--- pandas/core/indexing.py | 10 +++++----- pandas/core/series.py | 8 ++++---- pandas/core/sorting.py | 14 +++++++++----- pyproject.toml | 7 ------- 8 files changed, 56 insertions(+), 44 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9e37239c287b5..b305cbfaa3a1e 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -564,7 +564,7 @@ def compute_dict_like( indices = selected_obj.columns.get_indexer_for([key]) labels = selected_obj.columns.take(indices) label_to_indices = defaultdict(list) - for index, label in zip(indices, labels): + for index, label in zip(indices, labels, strict=True): label_to_indices[label].append(index) key_data = [ @@ -618,7 +618,9 @@ def wrap_results_dict_like( if all(is_ndframe): results = [result for result in result_data if not result.empty] keys_to_use: Iterable[Hashable] - keys_to_use = [k for k, v in zip(result_index, result_data) if not v.empty] + keys_to_use = [ + k for k, v in zip(result_index, result_data, strict=True) if not v.empty + ] # Have to check, if at least one DataFrame is not empty. if keys_to_use == []: keys_to_use = result_index @@ -1359,7 +1361,7 @@ def series_generator(self) -> Generator[Series]: yield obj._ixs(i, axis=0) else: - for arr, name in zip(values, self.index): + for arr, name in zip(values, self.index, strict=True): # GH#35462 re-pin mgr in case setitem changed it ser._mgr = mgr mgr.set_values(arr) @@ -1913,7 +1915,7 @@ def relabel_result( from pandas.core.indexes.base import Index reordered_indexes = [ - pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1]) + pair[0] for pair in sorted(zip(columns, order, strict=True), key=lambda t: t[1]) ] reordered_result_in_dict: dict[Hashable, Series] = {} idx = 0 diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 51ddd9e91b227..eeef8016db07f 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -298,7 +298,9 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any) # align all the inputs. types = tuple(type(x) for x in inputs) - alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)] + alignable = [ + x for x, t in zip(inputs, types, strict=True) if issubclass(t, NDFrame) + ] if len(alignable) > 1: # This triggers alignment. @@ -317,16 +319,16 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any) for obj in alignable[1:]: # this relies on the fact that we aren't handling mixed # series / frame ufuncs. - for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)): + for i, (ax1, ax2) in enumerate(zip(axes, obj.axes, strict=True)): axes[i] = ax1.union(ax2) - reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes)) + reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes, strict=True)) inputs = tuple( x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x - for x, t in zip(inputs, types) + for x, t in zip(inputs, types, strict=True) ) else: - reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes)) + reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes, strict=True)) if self.ndim == 1: names = {x.name for x in inputs if hasattr(x, "name")} @@ -450,7 +452,7 @@ def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwarg if not isinstance(out, tuple) or len(out) != len(result): raise NotImplementedError - for arr, res in zip(out, result): + for arr, res in zip(out, result, strict=True): _assign_where(arr, res, where) return out diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 694ff60166d43..72c9fe51be7f3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1524,7 +1524,7 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]: """ columns = self.columns klass = self._constructor_sliced - for k, v in zip(self.index, self.values): + for k, v in zip(self.index, self.values, strict=True): s = klass(v, index=columns, name=k).__finalize__(self) if self._mgr.is_single_block: s._mgr.add_references(self._mgr) @@ -1607,10 +1607,10 @@ def itertuples( itertuple = collections.namedtuple( # type: ignore[misc] name, fields, rename=True ) - return map(itertuple._make, zip(*arrays)) + return map(itertuple._make, zip(*arrays, strict=True)) # fallback to regular tuples - return zip(*arrays) + return zip(*arrays, strict=True) def __len__(self) -> int: """ @@ -4359,7 +4359,7 @@ def _setitem_array(self, key, value) -> None: if isinstance(value, DataFrame): check_key_length(self.columns, key, value) - for k1, k2 in zip(key, value.columns): + for k1, k2 in zip(key, value.columns, strict=False): self[k1] = value[k2] elif not is_list_like(value): @@ -4465,7 +4465,7 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: if len(cols_droplevel) and not cols_droplevel.equals(value.columns): value = value.reindex(cols_droplevel, axis=1) - for col, col_droplevel in zip(cols, cols_droplevel): + for col, col_droplevel in zip(cols, cols_droplevel, strict=True): self[col] = value[col_droplevel] return @@ -6567,7 +6567,11 @@ class max type names = self.index._get_default_index_names(names, default) if isinstance(self.index, MultiIndex): - to_insert = zip(reversed(self.index.levels), reversed(self.index.codes)) + to_insert = zip( + reversed(self.index.levels), + reversed(self.index.codes), + strict=True, + ) else: to_insert = ((self.index, None),) @@ -7093,7 +7097,7 @@ def f(vals) -> tuple[np.ndarray, int]: result.name = None else: vals = (col.values for name, col in self.items() if name in subset) - labels, shape = map(list, zip(*map(f, vals))) + labels, shape = map(list, zip(*map(f, vals), strict=True)) ids = get_group_index(labels, tuple(shape), sort=False, xnull=False) result = self._constructor_sliced(duplicated(ids, keep), index=self.index) @@ -7346,7 +7350,9 @@ def sort_values( # need to rewrap columns in Series to apply key function if key is not None: - keys_data = [Series(k, name=name) for (k, name) in zip(keys, by)] + keys_data = [ + Series(k, name=name) for (k, name) in zip(keys, by, strict=True) + ] else: # error: Argument 1 to "list" has incompatible type # "Generator[ExtensionArray | ndarray[Any, Any], None, None]"; @@ -8208,7 +8214,7 @@ def _dispatch_frame_op( arrays = [ array_op(_left, _right) - for _left, _right in zip(self._iter_column_arrays(), right) + for _left, _right in zip(self._iter_column_arrays(), right, strict=True) ] elif isinstance(right, Series): @@ -11745,7 +11751,7 @@ def c(x): return nanops.nancorr(x[0], x[1], method=method) correl = self._constructor_sliced( - map(c, zip(left.values.T, right.values.T)), + map(c, zip(left.values.T, right.values.T, strict=True)), index=left.columns, copy=False, ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 169f4726146be..f771e074dba45 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -614,7 +614,12 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: clean_column_name(k): Series( v, copy=False, index=self.index, name=k, dtype=dtype ).__finalize__(self) - for k, v, dtype in zip(self.columns, self._iter_column_arrays(), dtypes) + for k, v, dtype in zip( + self.columns, + self._iter_column_arrays(), + dtypes, + strict=True, + ) } @final @@ -7546,7 +7551,7 @@ def replace( items = list(to_replace.items()) if items: - keys, values = zip(*items) + keys, values = zip(*items, strict=True) else: keys, values = ([], []) # type: ignore[assignment] @@ -7565,7 +7570,7 @@ def replace( for k, v in items: # error: Incompatible types in assignment (expression has type # "list[Never]", variable has type "tuple[Any, ...]") - keys, values = list(zip(*v.items())) or ( # type: ignore[assignment] + keys, values = list(zip(*v.items(), strict=True)) or ( # type: ignore[assignment] [], [], ) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 98eb6034b6289..9246309c0c7f1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1091,7 +1091,7 @@ def _getitem_lowerdim(self, tup: tuple): # Reverse tuple so that we are indexing along columns before rows # and avoid unintended dtype inference. # GH60600 - for i, key in zip(range(len(tup) - 1, -1, -1), reversed(tup)): + for i, key in zip(range(len(tup) - 1, -1, -1), reversed(tup), strict=True): if is_label_like(key) or is_list_like(key): # We don't need to check for tuples here because those are # caught by the _is_nested_tuple_indexer check above. @@ -1357,7 +1357,7 @@ def _multi_take(self, tup: tuple): # GH 836 d = { axis: self._get_listlike_indexer(key, axis) - for (key, axis) in zip(tup, self.obj._AXIS_ORDERS) + for (key, axis) in zip(tup, self.obj._AXIS_ORDERS, strict=True) } return self.obj._reindex_with_indexers(d, allow_dups=True) @@ -1669,7 +1669,7 @@ def _has_valid_setitem_indexer(self, indexer) -> bool: if not isinstance(indexer, tuple): indexer = _tuplify(self.ndim, indexer) - for ax, i in zip(self.obj.axes, indexer): + for ax, i in zip(self.obj.axes, indexer, strict=False): if isinstance(i, slice): # should check the stop slice? pass @@ -1841,7 +1841,7 @@ def _decide_split_path(self, indexer, value) -> bool: # (not null slices) then we must take the split path, xref # GH 10360, GH 27841 if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): - for i, ax in zip(indexer, self.obj.axes): + for i, ax in zip(indexer, self.obj.axes, strict=True): if isinstance(ax, MultiIndex) and not ( is_integer(i) or com.is_null_slice(i) ): @@ -2036,7 +2036,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): elif len(ilocs) == len(value): # We are setting multiple columns in a single row. - for loc, v in zip(ilocs, value): + for loc, v in zip(ilocs, value, strict=True): self._setitem_single_column(loc, v, pi) elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: diff --git a/pandas/core/series.py b/pandas/core/series.py index 11a59f261de5c..3a7b0614c63ec 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1707,7 +1707,7 @@ def items(self) -> Iterable[tuple[Hashable, Any]]: Index : 1, Value : B Index : 2, Value : C """ - return zip(iter(self.index), iter(self)) + return zip(iter(self.index), iter(self), strict=True) # ---------------------------------------------------------------------- # Misc public methods @@ -5505,12 +5505,12 @@ def case_when( for condition, replacement in caselist ] default = self.copy(deep=False) - conditions, replacements = zip(*caselist) + conditions, replacements = zip(*caselist, strict=True) common_dtypes = [infer_dtype_from(arg)[0] for arg in [*replacements, default]] if len(set(common_dtypes)) > 1: common_dtype = find_common_type(common_dtypes) updated_replacements = [] - for condition, replacement in zip(conditions, replacements): + for condition, replacement in zip(conditions, replacements, strict=True): if is_scalar(replacement): replacement = construct_1d_arraylike_from_scalar( value=replacement, length=len(condition), dtype=common_dtype @@ -5525,7 +5525,7 @@ def case_when( counter = range(len(conditions) - 1, -1, -1) for position, condition, replacement in zip( - counter, reversed(conditions), reversed(replacements) + counter, reversed(conditions), reversed(replacements), strict=True ): try: default = default.mask( diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index a9827767455f7..930704e6f62f4 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -169,7 +169,7 @@ def maybe_lift(lab, size: int) -> tuple[np.ndarray, int]: labels = [ensure_int64(x) for x in labels] lshape = list(shape) if not xnull: - for i, (lab, size) in enumerate(zip(labels, shape)): + for i, (lab, size) in enumerate(zip(labels, shape, strict=True)): labels[i], lshape[i] = maybe_lift(lab, size) # Iteratively process all the labels in chunks sized so less @@ -289,7 +289,11 @@ def decons_obs_group_ids( if not is_int64_overflow_possible(shape): # obs ids are deconstructable! take the fast route! out = _decons_group_index(obs_ids, shape) - return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)] + return ( + out + if xnull or not lift.any() + else [x - y for x, y in zip(out, lift, strict=True)] + ) indexer = unique_label_indices(comp_ids) return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels] @@ -341,7 +345,7 @@ def lexsort_indexer( labels = [] - for k, order in zip(reversed(keys), orders): + for k, order in zip(reversed(keys), orders, strict=True): k = ensure_key_mapped(k, key) if codes_given: codes = cast(np.ndarray, k) @@ -473,9 +477,9 @@ def nargminmax(values: ExtensionArray, method: str, axis: AxisInt = 0): if arr_values.ndim > 1: if mask.any(): if axis == 1: - zipped = zip(arr_values, mask) + zipped = zip(arr_values, mask, strict=True) else: - zipped = zip(arr_values.T, mask.T) + zipped = zip(arr_values.T, mask.T, strict=True) return np.array([_nanargminmax(v, m, func) for v, m in zipped]) return func(arr_values, axis=axis) diff --git a/pyproject.toml b/pyproject.toml index 39dcca9ca2160..70cf3a56249ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -440,9 +440,7 @@ exclude = [ "asv_bench/benchmarks/series_methods.py" = ["B905"] "pandas/_config/config.py" = ["B905"] "pandas/conftest.py" = ["B905"] -"pandas/core/apply.py" = ["B905"] "pandas/core/array_algos/quantile.py" = ["B905"] -"pandas/core/arraylike.py" = ["B905"] "pandas/core/arrays/arrow/array.py" = ["B905"] "pandas/core/arrays/base.py" = ["B905"] "pandas/core/arrays/categorical.py" = ["B905"] @@ -456,15 +454,12 @@ exclude = [ "pandas/core/computation/expr.py" = ["B905"] "pandas/core/computation/ops.py" = ["B905"] "pandas/core/dtypes/missing.py" = ["B905"] -"pandas/core/frame.py" = ["B905"] -"pandas/core/generic.py" = ["B905"] "pandas/core/groupby/generic.py" = ["B905"] "pandas/core/groupby/groupby.py" = ["B905"] "pandas/core/groupby/grouper.py" = ["B905"] "pandas/core/groupby/ops.py" = ["B905"] "pandas/core/indexes/interval.py" = ["B905"] "pandas/core/indexes/multi.py" = ["B905"] -"pandas/core/indexing.py" = ["B905"] "pandas/core/methods/to_dict.py" = ["B905"] "pandas/core/reshape/concat.py" = ["B905"] "pandas/core/reshape/encoding.py" = ["B905"] @@ -472,8 +467,6 @@ exclude = [ "pandas/core/reshape/merge.py" = ["B905"] "pandas/core/reshape/pivot.py" = ["B905"] "pandas/core/reshape/reshape.py" = ["B905"] -"pandas/core/series.py" = ["B905"] -"pandas/core/sorting.py" = ["B905"] "pandas/core/strings/accessor.py" = ["B905"] "pandas/core/window/rolling.py" = ["B905"] "pandas/io/excel/_xlrd.py" = ["B905"]