Zip Strict for pandas/core level files pandas-dev#62469

shivamvishal · shivamvishal · commit d52e4aacab54 · 2025-10-04T13:06:57.000+01:00
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -564,7 +564,7 @@ def compute_dict_like(
                 indices = selected_obj.columns.get_indexer_for([key])
                 labels = selected_obj.columns.take(indices)
                 label_to_indices = defaultdict(list)
-                for index, label in zip(indices, labels):
+                for index, label in zip(indices, labels, strict=True):
                     label_to_indices[label].append(index)
 
                 key_data = [
@@ -618,7 +618,9 @@ def wrap_results_dict_like(
         if all(is_ndframe):
             results = [result for result in result_data if not result.empty]
             keys_to_use: Iterable[Hashable]
-            keys_to_use = [k for k, v in zip(result_index, result_data) if not v.empty]
+            keys_to_use = [
+                k for k, v in zip(result_index, result_data, strict=True) if not v.empty
+            ]
             # Have to check, if at least one DataFrame is not empty.
             if keys_to_use == []:
                 keys_to_use = result_index
@@ -1359,7 +1361,7 @@ def series_generator(self) -> Generator[Series]:
                 yield obj._ixs(i, axis=0)
 
         else:
-            for arr, name in zip(values, self.index):
+            for arr, name in zip(values, self.index, strict=True):
                 # GH#35462 re-pin mgr in case setitem changed it
                 ser._mgr = mgr
                 mgr.set_values(arr)
@@ -1913,7 +1915,7 @@ def relabel_result(
     from pandas.core.indexes.base import Index
 
     reordered_indexes = [
-        pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
+        pair[0] for pair in sorted(zip(columns, order, strict=True), key=lambda t: t[1])
     ]
     reordered_result_in_dict: dict[Hashable, Series] = {}
     idx = 0
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -298,7 +298,9 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
 
     # align all the inputs.
     types = tuple(type(x) for x in inputs)
-    alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
+    alignable = [
+        x for x, t in zip(inputs, types, strict=True) if issubclass(t, NDFrame)
+    ]
 
     if len(alignable) > 1:
         # This triggers alignment.
@@ -317,16 +319,16 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
         for obj in alignable[1:]:
             # this relies on the fact that we aren't handling mixed
             # series / frame ufuncs.
-            for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
+            for i, (ax1, ax2) in enumerate(zip(axes, obj.axes, strict=True)):
                 axes[i] = ax1.union(ax2)
 
-        reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
+        reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes, strict=True))
         inputs = tuple(
             x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
-            for x, t in zip(inputs, types)
+            for x, t in zip(inputs, types, strict=True)
         )
     else:
-        reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
+        reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes, strict=True))
 
     if self.ndim == 1:
         names = {x.name for x in inputs if hasattr(x, "name")}
@@ -450,7 +452,7 @@ def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwarg
         if not isinstance(out, tuple) or len(out) != len(result):
             raise NotImplementedError
 
-        for arr, res in zip(out, result):
+        for arr, res in zip(out, result, strict=True):
             _assign_where(arr, res, where)
 
         return out
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1524,7 +1524,7 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]:
         """
         columns = self.columns
         klass = self._constructor_sliced
-        for k, v in zip(self.index, self.values):
+        for k, v in zip(self.index, self.values, strict=True):
             s = klass(v, index=columns, name=k).__finalize__(self)
             if self._mgr.is_single_block:
                 s._mgr.add_references(self._mgr)
@@ -1607,10 +1607,10 @@ def itertuples(
             itertuple = collections.namedtuple(  # type: ignore[misc]
                 name, fields, rename=True
             )
-            return map(itertuple._make, zip(*arrays))
+            return map(itertuple._make, zip(*arrays, strict=True))
 
         # fallback to regular tuples
-        return zip(*arrays)
+        return zip(*arrays, strict=True)
 
     def __len__(self) -> int:
         """
@@ -4359,7 +4359,7 @@ def _setitem_array(self, key, value) -> None:
 
             if isinstance(value, DataFrame):
                 check_key_length(self.columns, key, value)
-                for k1, k2 in zip(key, value.columns):
+                for k1, k2 in zip(key, value.columns, strict=False):
                     self[k1] = value[k2]
 
             elif not is_list_like(value):
@@ -4465,7 +4465,7 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
                 if len(cols_droplevel) and not cols_droplevel.equals(value.columns):
                     value = value.reindex(cols_droplevel, axis=1)
 
-                for col, col_droplevel in zip(cols, cols_droplevel):
+                for col, col_droplevel in zip(cols, cols_droplevel, strict=True):
                     self[col] = value[col_droplevel]
                 return
 
@@ -6567,7 +6567,11 @@ class    max    type
             names = self.index._get_default_index_names(names, default)
 
             if isinstance(self.index, MultiIndex):
-                to_insert = zip(reversed(self.index.levels), reversed(self.index.codes))
+                to_insert = zip(
+                    reversed(self.index.levels),
+                    reversed(self.index.codes),
+                    strict=True,
+                )
             else:
                 to_insert = ((self.index, None),)
 
@@ -7093,7 +7097,7 @@ def f(vals) -> tuple[np.ndarray, int]:
             result.name = None
         else:
             vals = (col.values for name, col in self.items() if name in subset)
-            labels, shape = map(list, zip(*map(f, vals)))
+            labels, shape = map(list, zip(*map(f, vals), strict=True))
 
             ids = get_group_index(labels, tuple(shape), sort=False, xnull=False)
             result = self._constructor_sliced(duplicated(ids, keep), index=self.index)
@@ -7346,7 +7350,9 @@ def sort_values(
 
             # need to rewrap columns in Series to apply key function
             if key is not None:
-                keys_data = [Series(k, name=name) for (k, name) in zip(keys, by)]
+                keys_data = [
+                    Series(k, name=name) for (k, name) in zip(keys, by, strict=True)
+                ]
             else:
                 # error: Argument 1 to "list" has incompatible type
                 # "Generator[ExtensionArray | ndarray[Any, Any], None, None]";
@@ -8208,7 +8214,7 @@ def _dispatch_frame_op(
 
             arrays = [
                 array_op(_left, _right)
-                for _left, _right in zip(self._iter_column_arrays(), right)
+                for _left, _right in zip(self._iter_column_arrays(), right, strict=True)
             ]
 
         elif isinstance(right, Series):
@@ -11745,7 +11751,7 @@ def c(x):
                 return nanops.nancorr(x[0], x[1], method=method)
 
             correl = self._constructor_sliced(
-                map(c, zip(left.values.T, right.values.T)),
+                map(c, zip(left.values.T, right.values.T, strict=True)),
                 index=left.columns,
                 copy=False,
             )
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -614,7 +614,12 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:
             clean_column_name(k): Series(
                 v, copy=False, index=self.index, name=k, dtype=dtype
             ).__finalize__(self)
-            for k, v, dtype in zip(self.columns, self._iter_column_arrays(), dtypes)
+            for k, v, dtype in zip(
+                self.columns,
+                self._iter_column_arrays(),
+                dtypes,
+                strict=True,
+            )
         }
 
     @final
@@ -7546,7 +7551,7 @@ def replace(
 
             items = list(to_replace.items())
             if items:
-                keys, values = zip(*items)
+                keys, values = zip(*items, strict=True)
             else:
                 keys, values = ([], [])  # type: ignore[assignment]
 
@@ -7565,7 +7570,7 @@ def replace(
                 for k, v in items:
                     # error: Incompatible types in assignment (expression has type
                     # "list[Never]", variable has type "tuple[Any, ...]")
-                    keys, values = list(zip(*v.items())) or (  # type: ignore[assignment]
+                    keys, values = list(zip(*v.items(), strict=True)) or (  # type: ignore[assignment]
                         [],
                         [],
                     )
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1091,7 +1091,7 @@ def _getitem_lowerdim(self, tup: tuple):
 
         # Reverse tuple so that we are indexing along columns before rows
         # and avoid unintended dtype inference. # GH60600
-        for i, key in zip(range(len(tup) - 1, -1, -1), reversed(tup)):
+        for i, key in zip(range(len(tup) - 1, -1, -1), reversed(tup), strict=True):
             if is_label_like(key) or is_list_like(key):
                 # We don't need to check for tuples here because those are
                 #  caught by the _is_nested_tuple_indexer check above.
@@ -1357,7 +1357,7 @@ def _multi_take(self, tup: tuple):
         # GH 836
         d = {
             axis: self._get_listlike_indexer(key, axis)
-            for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
+            for (key, axis) in zip(tup, self.obj._AXIS_ORDERS, strict=True)
         }
         return self.obj._reindex_with_indexers(d, allow_dups=True)
 
@@ -1669,7 +1669,7 @@ def _has_valid_setitem_indexer(self, indexer) -> bool:
         if not isinstance(indexer, tuple):
             indexer = _tuplify(self.ndim, indexer)
 
-        for ax, i in zip(self.obj.axes, indexer):
+        for ax, i in zip(self.obj.axes, indexer, strict=False):
             if isinstance(i, slice):
                 # should check the stop slice?
                 pass
@@ -1841,7 +1841,7 @@ def _decide_split_path(self, indexer, value) -> bool:
         # (not null slices) then we must take the split path, xref
         # GH 10360, GH 27841
         if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
-            for i, ax in zip(indexer, self.obj.axes):
+            for i, ax in zip(indexer, self.obj.axes, strict=True):
                 if isinstance(ax, MultiIndex) and not (
                     is_integer(i) or com.is_null_slice(i)
                 ):
@@ -2036,7 +2036,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
 
             elif len(ilocs) == len(value):
                 # We are setting multiple columns in a single row.
-                for loc, v in zip(ilocs, value):
+                for loc, v in zip(ilocs, value, strict=True):
                     self._setitem_single_column(loc, v, pi)
 
             elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1707,7 +1707,7 @@ def items(self) -> Iterable[tuple[Hashable, Any]]:
         Index : 1, Value : B
         Index : 2, Value : C
         """
-        return zip(iter(self.index), iter(self))
+        return zip(iter(self.index), iter(self), strict=True)
 
     # ----------------------------------------------------------------------
     # Misc public methods
@@ -5505,12 +5505,12 @@ def case_when(
             for condition, replacement in caselist
         ]
         default = self.copy(deep=False)
-        conditions, replacements = zip(*caselist)
+        conditions, replacements = zip(*caselist, strict=True)
         common_dtypes = [infer_dtype_from(arg)[0] for arg in [*replacements, default]]
         if len(set(common_dtypes)) > 1:
             common_dtype = find_common_type(common_dtypes)
             updated_replacements = []
-            for condition, replacement in zip(conditions, replacements):
+            for condition, replacement in zip(conditions, replacements, strict=True):
                 if is_scalar(replacement):
                     replacement = construct_1d_arraylike_from_scalar(
                         value=replacement, length=len(condition), dtype=common_dtype
@@ -5525,7 +5525,7 @@ def case_when(
 
         counter = range(len(conditions) - 1, -1, -1)
         for position, condition, replacement in zip(
-            counter, reversed(conditions), reversed(replacements)
+            counter, reversed(conditions), reversed(replacements), strict=True
         ):
             try:
                 default = default.mask(
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -169,7 +169,7 @@ def maybe_lift(lab, size: int) -> tuple[np.ndarray, int]:
     labels = [ensure_int64(x) for x in labels]
     lshape = list(shape)
     if not xnull:
-        for i, (lab, size) in enumerate(zip(labels, shape)):
+        for i, (lab, size) in enumerate(zip(labels, shape, strict=True)):
             labels[i], lshape[i] = maybe_lift(lab, size)
 
     # Iteratively process all the labels in chunks sized so less
@@ -289,7 +289,11 @@ def decons_obs_group_ids(
     if not is_int64_overflow_possible(shape):
         # obs ids are deconstructable! take the fast route!
         out = _decons_group_index(obs_ids, shape)
-        return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)]
+        return (
+            out
+            if xnull or not lift.any()
+            else [x - y for x, y in zip(out, lift, strict=True)]
+        )
 
     indexer = unique_label_indices(comp_ids)
     return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels]
@@ -341,7 +345,7 @@ def lexsort_indexer(
 
     labels = []
 
-    for k, order in zip(reversed(keys), orders):
+    for k, order in zip(reversed(keys), orders, strict=True):
         k = ensure_key_mapped(k, key)
         if codes_given:
             codes = cast(np.ndarray, k)
@@ -473,9 +477,9 @@ def nargminmax(values: ExtensionArray, method: str, axis: AxisInt = 0):
     if arr_values.ndim > 1:
         if mask.any():
             if axis == 1:
-                zipped = zip(arr_values, mask)
+                zipped = zip(arr_values, mask, strict=True)
             else:
-                zipped = zip(arr_values.T, mask.T)
+                zipped = zip(arr_values.T, mask.T, strict=True)
             return np.array([_nanargminmax(v, m, func) for v, m in zipped])
         return func(arr_values, axis=axis)
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -440,9 +440,7 @@ exclude = [
 "asv_bench/benchmarks/series_methods.py" = ["B905"]
 "pandas/_config/config.py" = ["B905"]
 "pandas/conftest.py" = ["B905"]
-"pandas/core/apply.py" = ["B905"]
 "pandas/core/array_algos/quantile.py" = ["B905"]
-"pandas/core/arraylike.py" = ["B905"]
 "pandas/core/arrays/arrow/array.py" = ["B905"]
 "pandas/core/arrays/base.py" = ["B905"]
 "pandas/core/arrays/categorical.py" = ["B905"]
@@ -456,24 +454,19 @@ exclude = [
 "pandas/core/computation/expr.py" = ["B905"]
 "pandas/core/computation/ops.py" = ["B905"]
 "pandas/core/dtypes/missing.py" = ["B905"]
-"pandas/core/frame.py" = ["B905"]
-"pandas/core/generic.py" = ["B905"]
 "pandas/core/groupby/generic.py" = ["B905"]
 "pandas/core/groupby/groupby.py" = ["B905"]
 "pandas/core/groupby/grouper.py" = ["B905"]
 "pandas/core/groupby/ops.py" = ["B905"]
 "pandas/core/indexes/interval.py" = ["B905"]
 "pandas/core/indexes/multi.py" = ["B905"]
-"pandas/core/indexing.py" = ["B905"]
 "pandas/core/methods/to_dict.py" = ["B905"]
 "pandas/core/reshape/concat.py" = ["B905"]
 "pandas/core/reshape/encoding.py" = ["B905"]
 "pandas/core/reshape/melt.py" = ["B905"]
 "pandas/core/reshape/merge.py" = ["B905"]
 "pandas/core/reshape/pivot.py" = ["B905"]
 "pandas/core/reshape/reshape.py" = ["B905"]
-"pandas/core/series.py" = ["B905"]
-"pandas/core/sorting.py" = ["B905"]
 "pandas/core/strings/accessor.py" = ["B905"]
 "pandas/core/window/rolling.py" = ["B905"]
 "pandas/io/excel/_xlrd.py" = ["B905"]