STY: ZIP strict for pandas/io (#62469)

LirongMa · web-flow · commit f570026c9742 · 2025-10-04T09:18:38.000-07:00
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
@@ -134,7 +134,9 @@ def _parse_cell(cell_contents, cell_typ):
         return [
             [
                 _parse_cell(value, typ)
-                for value, typ in zip(sheet.row_values(i), sheet.row_types(i))
+                for value, typ in zip(
+                    sheet.row_values(i), sheet.row_types(i), strict=True
+                )
             ]
             for i in range(nrows)
         ]
diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py
@@ -57,7 +57,7 @@ def expand(self: CSSResolver, prop: str, value: str) -> Generator[tuple[str, str
                 stacklevel=find_stack_level(),
             )
             return
-        for key, idx in zip(self.SIDES, mapping):
+        for key, idx in zip(self.SIDES, mapping, strict=True):
             yield prop_fmt.format(key), tokens[idx]
 
     return expand
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -633,7 +633,7 @@ def _format_header_mi(self) -> Iterable[ExcelCell]:
             )
 
         for lnum, (spans, levels, level_codes) in enumerate(
-            zip(level_lengths, columns.levels, columns.codes)
+            zip(level_lengths, columns.levels, columns.codes, strict=True)
         ):
             values = levels.take(level_codes)
             for i, span_val in spans.items():
@@ -792,7 +792,10 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
                 level_lengths = get_level_lengths(level_strs)
 
                 for spans, levels, level_codes in zip(
-                    level_lengths, self.df.index.levels, self.df.index.codes
+                    level_lengths,
+                    self.df.index.levels,
+                    self.df.index.codes,
+                    strict=False,
                 ):
                     values = levels.take(
                         level_codes,
@@ -824,7 +827,7 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
 
             else:
                 # Format hierarchical rows with non-merged values.
-                for indexcolvals in zip(*self.df.index):
+                for indexcolvals in zip(*self.df.index, strict=True):
                     for idx, indexcolval in enumerate(indexcolvals):
                         # GH#60099
                         if isinstance(indexcolval, Period):
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -579,7 +579,7 @@ def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceTyp
                     f"Col_space length({len(col_space)}) should match "
                     f"DataFrame number of columns({len(self.frame.columns)})"
                 )
-            result = dict(zip(self.frame.columns, col_space))
+            result = dict(zip(self.frame.columns, col_space, strict=True))
         return result
 
     def _calc_max_cols_fitted(self) -> int | None:
@@ -786,7 +786,7 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
             if self.sparsify and len(fmt_columns):
                 fmt_columns = sparsify_labels(fmt_columns)
 
-            str_columns = [list(x) for x in zip(*fmt_columns)]
+            str_columns = [list(x) for x in zip(*fmt_columns, strict=True)]
         else:
             fmt_columns = columns._format_flat(include_name=False)
             str_columns = [
@@ -795,7 +795,9 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
                     if not self._get_formatter(i) and is_numeric_dtype(dtype)
                     else x
                 ]
-                for i, (x, dtype) in enumerate(zip(fmt_columns, self.frame.dtypes))
+                for i, (x, dtype) in enumerate(
+                    zip(fmt_columns, self.frame.dtypes, strict=False)
+                )
             ]
         return str_columns
 
@@ -1359,7 +1361,7 @@ def format_with_na_rep(
             formatted = np.array(
                 [
                     formatter(val) if not m else na_rep
-                    for val, m in zip(values.ravel(), mask.ravel())
+                    for val, m in zip(values.ravel(), mask.ravel(), strict=True)
                 ]
             ).reshape(values.shape)
             return formatted
@@ -1377,6 +1379,7 @@ def format_complex_with_na_rep(
                 imag_values,
                 real_mask,
                 imag_mask,
+                strict=True,
             ):
                 if not re_isna and not im_isna:
                     formatted_lst.append(formatter(val))
@@ -1796,7 +1799,7 @@ def _trim_zeros_complex(str_complexes: ArrayLike, decimal: str = ".") -> list[st
         + imag_pt[0]  # +/-
         + f"{imag_pt[1:]:>{padded_length}}"  # complex part (no sign), possibly nan
         + "j"
-        for real_pt, imag_pt in zip(padded_parts[:n], padded_parts[n:])
+        for real_pt, imag_pt in zip(padded_parts[:n], padded_parts[n:], strict=True)
     ]
     return padded
 
diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
@@ -289,7 +289,9 @@ def _write_col_header(self, indent: int) -> None:
             levels = self.columns._format_multi(sparsify=sentinel, include_names=False)
             level_lengths = get_level_lengths(levels, sentinel)
             inner_lvl = len(level_lengths) - 1
-            for lnum, (records, values) in enumerate(zip(level_lengths, levels)):
+            for lnum, (records, values) in enumerate(
+                zip(level_lengths, levels, strict=True)
+            ):
                 if is_truncated_horizontally:
                     # modify the header lines
                     ins_col = self.fmt.tr_col_num
@@ -486,7 +488,7 @@ def _write_hierarchical_rows(
 
         assert isinstance(frame.index, MultiIndex)
         idx_values = frame.index._format_multi(sparsify=False, include_names=False)
-        idx_values = list(zip(*idx_values))
+        idx_values = list(zip(*idx_values, strict=True))
 
         if self.fmt.sparsify:
             # GH3547
@@ -547,7 +549,7 @@ def _write_hierarchical_rows(
 
                 sparse_offset = 0
                 j = 0
-                for records, v in zip(level_lengths, idx_values[i]):
+                for records, v in zip(level_lengths, idx_values[i], strict=True):
                     if i in records:
                         if records[i] > 1:
                             tags[j] = template.format(span=records[i])
@@ -584,7 +586,10 @@ def _write_hierarchical_rows(
                     )
 
                 idx_values = list(
-                    zip(*frame.index._format_multi(sparsify=False, include_names=False))
+                    zip(
+                        *frame.index._format_multi(sparsify=False, include_names=False),
+                        strict=True,
+                    )
                 )
                 row = []
                 row.extend(idx_values[i])
diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
@@ -868,12 +868,14 @@ def _get_gross_column_widths(self) -> Sequence[int]:
         body_column_widths = self._get_body_column_widths()
         return [
             max(*widths)
-            for widths in zip(self.header_column_widths, body_column_widths)
+            for widths in zip(
+                self.header_column_widths, body_column_widths, strict=False
+            )
         ]
 
     def _get_body_column_widths(self) -> Sequence[int]:
         """Get widths of table content columns."""
-        strcols: Sequence[Sequence[str]] = list(zip(*self.strrows))
+        strcols: Sequence[Sequence[str]] = list(zip(*self.strrows, strict=True))
         return [max(len(x) for x in col) for col in strcols]
 
     def _gen_rows(self) -> Iterator[Sequence[str]]:
@@ -899,7 +901,9 @@ def add_header_line(self) -> None:
         header_line = self.SPACING.join(
             [
                 _put_str(header, col_width)
-                for header, col_width in zip(self.headers, self.gross_column_widths)
+                for header, col_width in zip(
+                    self.headers, self.gross_column_widths, strict=True
+                )
             ]
         )
         self._lines.append(header_line)
@@ -909,7 +913,7 @@ def add_separator_line(self) -> None:
             [
                 _put_str("-" * header_colwidth, gross_colwidth)
                 for header_colwidth, gross_colwidth in zip(
-                    self.header_column_widths, self.gross_column_widths
+                    self.header_column_widths, self.gross_column_widths, strict=True
                 )
             ]
         )
@@ -920,7 +924,9 @@ def add_body_lines(self) -> None:
             body_line = self.SPACING.join(
                 [
                     _put_str(col, gross_colwidth)
-                    for col, gross_colwidth in zip(row, self.gross_column_widths)
+                    for col, gross_colwidth in zip(
+                        row, self.gross_column_widths, strict=True
+                    )
                 ]
             )
             self._lines.append(body_line)
@@ -980,6 +986,7 @@ def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
             self._gen_line_numbers(),
             self._gen_columns(),
             self._gen_dtypes(),
+            strict=True,
         )
 
     def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
@@ -989,6 +996,7 @@ def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
             self._gen_columns(),
             self._gen_non_null_counts(),
             self._gen_dtypes(),
+            strict=True,
         )
 
     def _gen_line_numbers(self) -> Iterator[str]:
@@ -1092,10 +1100,7 @@ def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
 
     def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
         """Iterator with string representation of body data with counts."""
-        yield from zip(
-            self._gen_non_null_counts(),
-            self._gen_dtypes(),
-        )
+        yield from zip(self._gen_non_null_counts(), self._gen_dtypes(), strict=True)
 
 
 def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -60,7 +60,7 @@ def adjoin(space: int, *lists: list[str], **kwargs: Any) -> str:
         nl = justfunc(lst, lengths[i], mode="left")
         nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
         newLists.append(nl)
-    toJoin = zip(*newLists)
+    toJoin = zip(*newLists, strict=True)
     return "\n".join("".join(lines) for lines in toJoin)
 
 
@@ -497,14 +497,16 @@ def _justify(
     max_length = [0] * len(combined[0])
     for inner_seq in combined:
         length = [len(item) for item in inner_seq]
-        max_length = [max(x, y) for x, y in zip(max_length, length)]
+        max_length = [max(x, y) for x, y in zip(max_length, length, strict=True)]
 
     # justify each item in each list-like in head and tail using max_length
     head_tuples = [
-        tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head
+        tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length, strict=True))
+        for seq in head
     ]
     tail_tuples = [
-        tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail
+        tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length, strict=True))
+        for seq in tail
     ]
     return head_tuples, tail_tuples
 
diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -429,7 +429,7 @@ def _translate_header(self, sparsify_cols: bool, max_cols: int):
         clabels = self.data.columns.tolist()
         if self.data.columns.nlevels == 1:
             clabels = [[x] for x in clabels]
-        clabels = list(zip(*clabels))
+        clabels = list(zip(*clabels, strict=True))
 
         head = []
         # 1) column headers
@@ -914,7 +914,7 @@ def concatenated_visible_rows(obj):
             return row_indices
 
         body = []
-        for r, row in zip(concatenated_visible_rows(self), d["body"]):
+        for r, row in zip(concatenated_visible_rows(self), d["body"], strict=True):
             # note: cannot enumerate d["body"] because rows were dropped if hidden
             # during _translate_body so must zip to acquire the true r-index associated
             # with the ctx obj which contains the cell styles.
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -550,7 +550,7 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None:
             data = [data]
         if len(path) > 1:
             for obj in data:
-                for val, key in zip(_meta, meta_keys):
+                for val, key in zip(_meta, meta_keys, strict=True):
                     if level + 1 == len(val):
                         seen_meta[key] = _pull_field(obj, val[-1])
 
@@ -567,7 +567,7 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None:
 
                 # For repeating the metadata later
                 lengths.append(len(recs))
-                for val, key in zip(_meta, meta_keys):
+                for val, key in zip(_meta, meta_keys, strict=True):
                     if level + 1 > len(val):
                         meta_val = seen_meta[key]
                     else:
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
@@ -308,7 +308,7 @@ def build_table_schema(
     if index:
         if data.index.nlevels > 1:
             data.index = cast("MultiIndex", data.index)
-            for level, name in zip(data.index.levels, data.index.names):
+            for level, name in zip(data.index.levels, data.index.names, strict=True):
                 new_field = convert_pandas_type_to_json_field(level)
                 new_field["name"] = name
                 fields.append(new_field)
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -237,7 +237,7 @@ def _extract_multi_indexer_columns(
         def extract(r):
             return tuple(r[i] for i in range(field_count) if i not in sic)
 
-        columns = list(zip(*(extract(r) for r in header)))
+        columns = list(zip(*(extract(r) for r in header), strict=True))
         names = columns.copy()
         for single_ic in sorted(ic):
             names.insert(single_ic, single_ic)
@@ -328,9 +328,11 @@ def _agg_index(self, index) -> Index:
 
         if self.index_names is not None:
             names: Iterable = self.index_names
+            zip_strict = True
         else:
             names = itertools.cycle([None])
-        for i, (arr, name) in enumerate(zip(index, names)):
+            zip_strict = False
+        for i, (arr, name) in enumerate(zip(index, names, strict=zip_strict)):
             if self._should_parse_dates(i):
                 arr = date_converter(
                     arr,
diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
@@ -292,7 +292,7 @@ def read(
 
             # rename dict keys
             data_tups = sorted(data.items())
-            data = {k: v for k, (i, v) in zip(names, data_tups)}
+            data = {k: v for k, (i, v) in zip(names, data_tups, strict=True)}
 
             date_data = self._do_date_conversions(names, data)
 
@@ -317,7 +317,7 @@ def read(
             if self.usecols is None:
                 self._check_data_length(names, alldata)
 
-            data = {k: v for k, (i, v) in zip(names, data_tups)}
+            data = {k: v for k, (i, v) in zip(names, data_tups, strict=False)}
 
             date_data = self._do_date_conversions(names, data)
             index, column_names = self._make_index(alldata, names)
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -1349,7 +1349,7 @@ def _set_no_thousand_columns(self) -> set[int]:
             )
         if self.columns and self.dtype:
             assert self._col_indices is not None
-            for i, col in zip(self._col_indices, self.columns):
+            for i, col in zip(self._col_indices, self.columns, strict=True):
                 if not isinstance(self.dtype, dict) and not is_numeric_dtype(
                     self.dtype
                 ):
@@ -1466,7 +1466,7 @@ def detect_colspecs(
         shifted = np.roll(mask, 1)
         shifted[0] = 0
         edges = np.where((mask ^ shifted) == 1)[0]
-        edge_pairs = list(zip(edges[::2], edges[1::2]))
+        edge_pairs = list(zip(edges[::2], edges[1::2], strict=True))
         return edge_pairs
 
     def __next__(self) -> list[str]:
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -134,7 +134,9 @@ def _parse_cell(cell_contents, cell_typ):`
`134`	`134`	`return [`
`135`	`135`	`[`
`136`	`136`	`_parse_cell(value, typ)`
`137`		`- for value, typ in zip(sheet.row_values(i), sheet.row_types(i))`
	`137`	`+ for value, typ in zip(`
	`138`	`+ sheet.row_values(i), sheet.row_types(i), strict=True`
	`139`	`+ )`
`138`	`140`	`]`
`139`	`141`	`for i in range(nrows)`
`140`	`142`	`]`
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ def expand(self: CSSResolver, prop: str, value: str) -> Generator[tuple[str, str`
`57`	`57`	`stacklevel=find_stack_level(),`
`58`	`58`	`)`
`59`	`59`	`return`
`60`		`- for key, idx in zip(self.SIDES, mapping):`
	`60`	`+ for key, idx in zip(self.SIDES, mapping, strict=True):`
`61`	`61`	`yield prop_fmt.format(key), tokens[idx]`
`62`	`62`
`63`	`63`	`return expand`
Original file line number	Diff line number	Diff line change
`@@ -868,12 +868,14 @@ def _get_gross_column_widths(self) -> Sequence[int]:`
`868`	`868`	`body_column_widths = self._get_body_column_widths()`
`869`	`869`	`return [`
`870`	`870`	`max(*widths)`
`871`		`- for widths in zip(self.header_column_widths, body_column_widths)`
	`871`	`+ for widths in zip(`
	`872`	`+ self.header_column_widths, body_column_widths, strict=False`
	`873`	`+ )`
`872`	`874`	`]`
`873`	`875`
`874`	`876`	`def _get_body_column_widths(self) -> Sequence[int]:`
`875`	`877`	`"""Get widths of table content columns."""`
`876`		`- strcols: Sequence[Sequence[str]] = list(zip(*self.strrows))`
	`878`	`+ strcols: Sequence[Sequence[str]] = list(zip(*self.strrows, strict=True))`
`877`	`879`	`return [max(len(x) for x in col) for col in strcols]`
`878`	`880`
`879`	`881`	`def _gen_rows(self) -> Iterator[Sequence[str]]:`
`@@ -899,7 +901,9 @@ def add_header_line(self) -> None:`
`899`	`901`	`header_line = self.SPACING.join(`
`900`	`902`	`[`
`901`	`903`	`_put_str(header, col_width)`
`902`		`- for header, col_width in zip(self.headers, self.gross_column_widths)`
	`904`	`+ for header, col_width in zip(`
	`905`	`+ self.headers, self.gross_column_widths, strict=True`
	`906`	`+ )`
`903`	`907`	`]`
`904`	`908`	`)`
`905`	`909`	`self._lines.append(header_line)`
`@@ -909,7 +913,7 @@ def add_separator_line(self) -> None:`
`909`	`913`	`[`
`910`	`914`	`_put_str("-" * header_colwidth, gross_colwidth)`
`911`	`915`	`for header_colwidth, gross_colwidth in zip(`
`912`		`- self.header_column_widths, self.gross_column_widths`
	`916`	`+ self.header_column_widths, self.gross_column_widths, strict=True`
`913`	`917`	`)`
`914`	`918`	`]`
`915`	`919`	`)`
`@@ -920,7 +924,9 @@ def add_body_lines(self) -> None:`
`920`	`924`	`body_line = self.SPACING.join(`
`921`	`925`	`[`
`922`	`926`	`_put_str(col, gross_colwidth)`
`923`		`- for col, gross_colwidth in zip(row, self.gross_column_widths)`
	`927`	`+ for col, gross_colwidth in zip(`
	`928`	`+ row, self.gross_column_widths, strict=True`
	`929`	`+ )`
`924`	`930`	`]`
`925`	`931`	`)`
`926`	`932`	`self._lines.append(body_line)`
`@@ -980,6 +986,7 @@ def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:`
`980`	`986`	`self._gen_line_numbers(),`
`981`	`987`	`self._gen_columns(),`
`982`	`988`	`self._gen_dtypes(),`
	`989`	`+ strict=True,`
`983`	`990`	`)`
`984`	`991`
`985`	`992`	`def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:`
`@@ -989,6 +996,7 @@ def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:`
`989`	`996`	`self._gen_columns(),`
`990`	`997`	`self._gen_non_null_counts(),`
`991`	`998`	`self._gen_dtypes(),`
	`999`	`+ strict=True,`
`992`	`1000`	`)`
`993`	`1001`
`994`	`1002`	`def _gen_line_numbers(self) -> Iterator[str]:`
`@@ -1092,10 +1100,7 @@ def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:`
`1092`	`1100`
`1093`	`1101`	`def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:`
`1094`	`1102`	`"""Iterator with string representation of body data with counts."""`
`1095`		`- yield from zip(`
`1096`		`- self._gen_non_null_counts(),`
`1097`		`- self._gen_dtypes(),`
`1098`		`- )`
	`1103`	`+ yield from zip(self._gen_non_null_counts(), self._gen_dtypes(), strict=True)`
`1099`	`1104`
`1100`	`1105`
`1101`	`1106`	`def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:`