Skip to content

Commit bcb99d5

Browse files
authored
Zip Strict for pandas/core/computation and pandas/core/groupby #62469 (#62510)
1 parent 0557e16 commit bcb99d5

File tree

7 files changed

+25
-21
lines changed

7 files changed

+25
-21
lines changed

pandas/core/computation/align.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def _align_core(terms):
9393

9494
from pandas import Series
9595

96-
ndims = Series(dict(zip(term_index, term_dims)))
96+
ndims = Series(dict(zip(term_index, term_dims, strict=True)))
9797

9898
# initial axes are the axes of the largest-axis'd term
9999
biggest = terms[ndims.idxmax()].value
@@ -116,7 +116,7 @@ def _align_core(terms):
116116
axes[ax] = axes[ax].union(itm)
117117

118118
for i, ndim in ndims.items():
119-
for axis, items in zip(range(ndim), axes):
119+
for axis, items in zip(range(ndim), axes, strict=False):
120120
ti = terms[i].value
121121

122122
if hasattr(ti, "reindex"):

pandas/core/computation/expr.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -383,11 +383,11 @@ class BaseExprVisitor(ast.NodeVisitor):
383383
"FloorDiv",
384384
"Mod",
385385
)
386-
binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))
386+
binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes, strict=True))
387387

388388
unary_ops = UNARY_OPS_SYMS
389389
unary_op_nodes = "UAdd", "USub", "Invert", "Not"
390-
unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
390+
unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes, strict=True))
391391

392392
rewrite_map = {
393393
ast.Eq: ast.In,
@@ -731,7 +731,7 @@ def visit_Compare(self, node, **kwargs):
731731
# recursive case: we have a chained comparison, a CMP b CMP c, etc.
732732
left = node.left
733733
values = []
734-
for op, comp in zip(ops, comps):
734+
for op, comp in zip(ops, comps, strict=True):
735735
new_node = self.visit(
736736
ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)])
737737
)

pandas/core/computation/ops.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,11 +302,11 @@ def _not_in(x, y):
302302
_in,
303303
_not_in,
304304
)
305-
_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs))
305+
_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs, strict=True))
306306

307307
BOOL_OPS_SYMS = ("&", "|", "and", "or")
308308
_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_)
309-
_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs))
309+
_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs, strict=True))
310310

311311
ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%")
312312
_arith_ops_funcs = (
@@ -318,7 +318,7 @@ def _not_in(x, y):
318318
operator.floordiv,
319319
operator.mod,
320320
)
321-
_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs))
321+
_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs, strict=True))
322322

323323
_binary_ops_dict = {}
324324

@@ -484,7 +484,7 @@ def _disallow_scalar_only_bool_ops(self) -> None:
484484

485485
UNARY_OPS_SYMS = ("+", "-", "~", "not")
486486
_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)
487-
_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs))
487+
_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs, strict=True))
488488

489489

490490
class UnaryOp(Op):

pandas/core/groupby/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
536536
else:
537537
# list of functions / function names
538538
columns = (com.get_callable_name(f) or f for f in arg)
539-
arg = zip(columns, arg)
539+
arg = zip(columns, arg, strict=True)
540540

541541
results: dict[base.OutputKey, DataFrame | Series] = {}
542542
with com.temp_setattr(self, "as_index", True):

pandas/core/groupby/groupby.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,10 @@ def get_converter(s):
682682
raise ValueError(msg) from err
683683

684684
converters = (get_converter(s) for s in index_sample)
685-
names = (tuple(f(n) for f, n in zip(converters, name)) for name in names)
685+
names = (
686+
tuple(f(n) for f, n in zip(converters, name, strict=True))
687+
for name in names
688+
)
686689

687690
else:
688691
converter = get_converter(index_sample)
@@ -1235,6 +1238,7 @@ def _insert_inaxis_grouper(
12351238
zip(
12361239
reversed(self._grouper.names),
12371240
self._grouper.get_group_levels(),
1241+
strict=True,
12381242
)
12391243
):
12401244
if name is None:

pandas/core/groupby/grouper.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -684,9 +684,9 @@ def groups(self) -> dict[Hashable, Index]:
684684

685685
r, counts = libalgos.groupsort_indexer(ensure_platform_int(codes), len(uniques))
686686
counts = ensure_int64(counts).cumsum()
687-
_result = (r[start:end] for start, end in zip(counts, counts[1:]))
687+
_result = (r[start:end] for start, end in zip(counts, counts[1:], strict=False))
688688
# map to the label
689-
result = {k: self._index.take(v) for k, v in zip(uniques, _result)}
689+
result = {k: self._index.take(v) for k, v in zip(uniques, _result, strict=True)}
690690

691691
return PrettyDict(result)
692692

@@ -875,7 +875,7 @@ def is_in_obj(gpr) -> bool:
875875
return gpr._mgr.references_same_values(obj_gpr_column._mgr, 0)
876876
return False
877877

878-
for gpr, level in zip(keys, levels):
878+
for gpr, level in zip(keys, levels, strict=True):
879879
if is_in_obj(gpr): # df.groupby(df['name'])
880880
in_axis = True
881881
exclusions.add(gpr.name)

pandas/core/groupby/ops.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ def get_iterator(self, data: NDFrameT) -> Iterator[tuple[Hashable, NDFrameT]]:
625625
splitter = self._get_splitter(data)
626626
# TODO: Would be more efficient to skip unobserved for transforms
627627
keys = self.result_index
628-
yield from zip(keys, splitter)
628+
yield from zip(keys, splitter, strict=True)
629629

630630
@final
631631
def _get_splitter(self, data: NDFrame) -> DataSplitter:
@@ -766,7 +766,7 @@ def result_index_and_ids(self) -> tuple[Index, npt.NDArray[np.intp]]:
766766
]
767767
sorts = [ping._sort for ping in self.groupings]
768768
# When passed a categorical grouping, keep all categories
769-
for k, (ping, level) in enumerate(zip(self.groupings, levels)):
769+
for k, (ping, level) in enumerate(zip(self.groupings, levels, strict=True)):
770770
if ping._passed_categorical:
771771
levels[k] = level.set_categories(ping._orig_cats)
772772

@@ -997,7 +997,7 @@ def apply_groupwise(
997997
result_values = []
998998

999999
# This calls DataSplitter.__iter__
1000-
zipped = zip(group_keys, splitter)
1000+
zipped = zip(group_keys, splitter, strict=True)
10011001

10021002
for key, group in zipped:
10031003
# Pinning name is needed for
@@ -1095,7 +1095,7 @@ def groups(self):
10951095
# GH 3881
10961096
result = {
10971097
key: value
1098-
for key, value in zip(self.binlabels, self.bins)
1098+
for key, value in zip(self.binlabels, self.bins, strict=True)
10991099
if key is not NaT
11001100
}
11011101
return result
@@ -1126,7 +1126,7 @@ def get_iterator(self, data: NDFrame):
11261126
slicer = lambda start, edge: data.iloc[start:edge]
11271127

11281128
start: np.int64 | int = 0
1129-
for edge, label in zip(self.bins, self.binlabels):
1129+
for edge, label in zip(self.bins, self.binlabels, strict=True):
11301130
if label is not NaT:
11311131
yield label, slicer(start, edge)
11321132
start = edge
@@ -1139,7 +1139,7 @@ def indices(self):
11391139
indices = collections.defaultdict(list)
11401140

11411141
i: np.int64 | int = 0
1142-
for label, bin in zip(self.binlabels, self.bins):
1142+
for label, bin in zip(self.binlabels, self.bins, strict=True):
11431143
if i < bin:
11441144
if label is not NaT:
11451145
indices[label] = list(range(i, bin))
@@ -1229,7 +1229,7 @@ def __iter__(self) -> Iterator:
12291229

12301230
starts, ends = lib.generate_slices(self._slabels, self.ngroups)
12311231
sdata = self._sorted_data
1232-
for start, end in zip(starts, ends):
1232+
for start, end in zip(starts, ends, strict=True):
12331233
yield self._chop(sdata, slice(start, end))
12341234

12351235
@cache_readonly

0 commit comments

Comments
 (0)