Zip Strict for pandas/core/computation and pandas/core/groupby #62469 (#62510)

floura-angel · web-flow · commit bcb99d5f9882 · 2025-09-30T11:47:07.000-07:00
diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py
@@ -93,7 +93,7 @@ def _align_core(terms):
 
     from pandas import Series
 
-    ndims = Series(dict(zip(term_index, term_dims)))
+    ndims = Series(dict(zip(term_index, term_dims, strict=True)))
 
     # initial axes are the axes of the largest-axis'd term
     biggest = terms[ndims.idxmax()].value
@@ -116,7 +116,7 @@ def _align_core(terms):
                 axes[ax] = axes[ax].union(itm)
 
     for i, ndim in ndims.items():
-        for axis, items in zip(range(ndim), axes):
+        for axis, items in zip(range(ndim), axes, strict=False):
             ti = terms[i].value
 
             if hasattr(ti, "reindex"):
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
@@ -383,11 +383,11 @@ class BaseExprVisitor(ast.NodeVisitor):
         "FloorDiv",
         "Mod",
     )
-    binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))
+    binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes, strict=True))
 
     unary_ops = UNARY_OPS_SYMS
     unary_op_nodes = "UAdd", "USub", "Invert", "Not"
-    unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
+    unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes, strict=True))
 
     rewrite_map = {
         ast.Eq: ast.In,
@@ -731,7 +731,7 @@ def visit_Compare(self, node, **kwargs):
         # recursive case: we have a chained comparison, a CMP b CMP c, etc.
         left = node.left
         values = []
-        for op, comp in zip(ops, comps):
+        for op, comp in zip(ops, comps, strict=True):
             new_node = self.visit(
                 ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)])
             )
diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py
@@ -302,11 +302,11 @@ def _not_in(x, y):
     _in,
     _not_in,
 )
-_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs))
+_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs, strict=True))
 
 BOOL_OPS_SYMS = ("&", "|", "and", "or")
 _bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_)
-_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs))
+_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs, strict=True))
 
 ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%")
 _arith_ops_funcs = (
@@ -318,7 +318,7 @@ def _not_in(x, y):
     operator.floordiv,
     operator.mod,
 )
-_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs))
+_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs, strict=True))
 
 _binary_ops_dict = {}
 
@@ -484,7 +484,7 @@ def _disallow_scalar_only_bool_ops(self) -> None:
 
 UNARY_OPS_SYMS = ("+", "-", "~", "not")
 _unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)
-_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs))
+_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs, strict=True))
 
 
 class UnaryOp(Op):
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -536,7 +536,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
         else:
             # list of functions / function names
             columns = (com.get_callable_name(f) or f for f in arg)
-            arg = zip(columns, arg)
+            arg = zip(columns, arg, strict=True)
 
         results: dict[base.OutputKey, DataFrame | Series] = {}
         with com.temp_setattr(self, "as_index", True):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -682,7 +682,10 @@ def get_converter(s):
                     raise ValueError(msg) from err
 
             converters = (get_converter(s) for s in index_sample)
-            names = (tuple(f(n) for f, n in zip(converters, name)) for name in names)
+            names = (
+                tuple(f(n) for f, n in zip(converters, name, strict=True))
+                for name in names
+            )
 
         else:
             converter = get_converter(index_sample)
@@ -1235,6 +1238,7 @@ def _insert_inaxis_grouper(
             zip(
                 reversed(self._grouper.names),
                 self._grouper.get_group_levels(),
+                strict=True,
             )
         ):
             if name is None:
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -684,9 +684,9 @@ def groups(self) -> dict[Hashable, Index]:
 
         r, counts = libalgos.groupsort_indexer(ensure_platform_int(codes), len(uniques))
         counts = ensure_int64(counts).cumsum()
-        _result = (r[start:end] for start, end in zip(counts, counts[1:]))
+        _result = (r[start:end] for start, end in zip(counts, counts[1:], strict=False))
         # map to the label
-        result = {k: self._index.take(v) for k, v in zip(uniques, _result)}
+        result = {k: self._index.take(v) for k, v in zip(uniques, _result, strict=True)}
 
         return PrettyDict(result)
 
@@ -875,7 +875,7 @@ def is_in_obj(gpr) -> bool:
             return gpr._mgr.references_same_values(obj_gpr_column._mgr, 0)
         return False
 
-    for gpr, level in zip(keys, levels):
+    for gpr, level in zip(keys, levels, strict=True):
         if is_in_obj(gpr):  # df.groupby(df['name'])
             in_axis = True
             exclusions.add(gpr.name)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -625,7 +625,7 @@ def get_iterator(self, data: NDFrameT) -> Iterator[tuple[Hashable, NDFrameT]]:
         splitter = self._get_splitter(data)
         # TODO: Would be more efficient to skip unobserved for transforms
         keys = self.result_index
-        yield from zip(keys, splitter)
+        yield from zip(keys, splitter, strict=True)
 
     @final
     def _get_splitter(self, data: NDFrame) -> DataSplitter:
@@ -766,7 +766,7 @@ def result_index_and_ids(self) -> tuple[Index, npt.NDArray[np.intp]]:
         ]
         sorts = [ping._sort for ping in self.groupings]
         # When passed a categorical grouping, keep all categories
-        for k, (ping, level) in enumerate(zip(self.groupings, levels)):
+        for k, (ping, level) in enumerate(zip(self.groupings, levels, strict=True)):
             if ping._passed_categorical:
                 levels[k] = level.set_categories(ping._orig_cats)
 
@@ -997,7 +997,7 @@ def apply_groupwise(
         result_values = []
 
         # This calls DataSplitter.__iter__
-        zipped = zip(group_keys, splitter)
+        zipped = zip(group_keys, splitter, strict=True)
 
         for key, group in zipped:
             # Pinning name is needed for
@@ -1095,7 +1095,7 @@ def groups(self):
         # GH 3881
         result = {
             key: value
-            for key, value in zip(self.binlabels, self.bins)
+            for key, value in zip(self.binlabels, self.bins, strict=True)
             if key is not NaT
         }
         return result
@@ -1126,7 +1126,7 @@ def get_iterator(self, data: NDFrame):
         slicer = lambda start, edge: data.iloc[start:edge]
 
         start: np.int64 | int = 0
-        for edge, label in zip(self.bins, self.binlabels):
+        for edge, label in zip(self.bins, self.binlabels, strict=True):
             if label is not NaT:
                 yield label, slicer(start, edge)
             start = edge
@@ -1139,7 +1139,7 @@ def indices(self):
         indices = collections.defaultdict(list)
 
         i: np.int64 | int = 0
-        for label, bin in zip(self.binlabels, self.bins):
+        for label, bin in zip(self.binlabels, self.bins, strict=True):
             if i < bin:
                 if label is not NaT:
                     indices[label] = list(range(i, bin))
@@ -1229,7 +1229,7 @@ def __iter__(self) -> Iterator:
 
         starts, ends = lib.generate_slices(self._slabels, self.ngroups)
         sdata = self._sorted_data
-        for start, end in zip(starts, ends):
+        for start, end in zip(starts, ends, strict=True):
             yield self._chop(sdata, slice(start, end))
 
     @cache_readonly