-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
STY: ZIP strict for pandas/io #62469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9a1c452
ee2bf48
ccdc4f5
70a8357
e431a8b
8065ce1
e5a9750
ce141ba
b70e14e
3591b11
3c2d0c0
bae4e48
2f79a25
3da45c3
c324360
85b57de
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -579,7 +579,7 @@ def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceTyp | |
f"Col_space length({len(col_space)}) should match " | ||
f"DataFrame number of columns({len(self.frame.columns)})" | ||
) | ||
result = dict(zip(self.frame.columns, col_space)) | ||
result = dict(zip(self.frame.columns, col_space, strict=True)) | ||
return result | ||
|
||
def _calc_max_cols_fitted(self) -> int | None: | ||
|
@@ -786,7 +786,7 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]: | |
if self.sparsify and len(fmt_columns): | ||
fmt_columns = sparsify_labels(fmt_columns) | ||
|
||
str_columns = [list(x) for x in zip(*fmt_columns)] | ||
str_columns = [list(x) for x in zip(*fmt_columns, strict=True)] | ||
else: | ||
fmt_columns = columns._format_flat(include_name=False) | ||
str_columns = [ | ||
|
@@ -795,7 +795,9 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]: | |
if not self._get_formatter(i) and is_numeric_dtype(dtype) | ||
else x | ||
] | ||
for i, (x, dtype) in enumerate(zip(fmt_columns, self.frame.dtypes)) | ||
for i, (x, dtype) in enumerate( | ||
zip(fmt_columns, self.frame.dtypes, strict=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this False? |
||
) | ||
] | ||
return str_columns | ||
|
||
|
@@ -1359,7 +1361,7 @@ def format_with_na_rep( | |
formatted = np.array( | ||
[ | ||
formatter(val) if not m else na_rep | ||
for val, m in zip(values.ravel(), mask.ravel()) | ||
for val, m in zip(values.ravel(), mask.ravel(), strict=True) | ||
] | ||
).reshape(values.shape) | ||
return formatted | ||
|
@@ -1377,6 +1379,7 @@ def format_complex_with_na_rep( | |
imag_values, | ||
real_mask, | ||
imag_mask, | ||
strict=True, | ||
): | ||
if not re_isna and not im_isna: | ||
formatted_lst.append(formatter(val)) | ||
|
@@ -1796,7 +1799,7 @@ def _trim_zeros_complex(str_complexes: ArrayLike, decimal: str = ".") -> list[st | |
+ imag_pt[0] # +/- | ||
+ f"{imag_pt[1:]:>{padded_length}}" # complex part (no sign), possibly nan | ||
+ "j" | ||
for real_pt, imag_pt in zip(padded_parts[:n], padded_parts[n:]) | ||
for real_pt, imag_pt in zip(padded_parts[:n], padded_parts[n:], strict=True) | ||
] | ||
return padded | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -289,7 +289,9 @@ def _write_col_header(self, indent: int) -> None: | |
levels = self.columns._format_multi(sparsify=sentinel, include_names=False) | ||
level_lengths = get_level_lengths(levels, sentinel) | ||
inner_lvl = len(level_lengths) - 1 | ||
for lnum, (records, values) in enumerate(zip(level_lengths, levels)): | ||
for lnum, (records, values) in enumerate( | ||
zip(level_lengths, levels, strict=True) | ||
): | ||
if is_truncated_horizontally: | ||
# modify the header lines | ||
ins_col = self.fmt.tr_col_num | ||
|
@@ -486,7 +488,7 @@ def _write_hierarchical_rows( | |
|
||
assert isinstance(frame.index, MultiIndex) | ||
idx_values = frame.index._format_multi(sparsify=False, include_names=False) | ||
idx_values = list(zip(*idx_values)) | ||
idx_values = list(zip(*idx_values, strict=True)) | ||
|
||
if self.fmt.sparsify: | ||
# GH3547 | ||
|
@@ -547,7 +549,7 @@ def _write_hierarchical_rows( | |
|
||
sparse_offset = 0 | ||
j = 0 | ||
for records, v in zip(level_lengths, idx_values[i]): | ||
for records, v in zip(level_lengths, idx_values[i], strict=True): | ||
if i in records: | ||
if records[i] > 1: | ||
tags[j] = template.format(span=records[i]) | ||
|
@@ -584,7 +586,10 @@ def _write_hierarchical_rows( | |
) | ||
|
||
idx_values = list( | ||
zip(*frame.index._format_multi(sparsify=False, include_names=False)) | ||
zip( | ||
*frame.index._format_multi(sparsify=False, include_names=False), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this False? |
||
strict=True, | ||
) | ||
) | ||
row = [] | ||
row.extend(idx_values[i]) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -868,12 +868,14 @@ def _get_gross_column_widths(self) -> Sequence[int]: | |
body_column_widths = self._get_body_column_widths() | ||
return [ | ||
max(*widths) | ||
for widths in zip(self.header_column_widths, body_column_widths) | ||
for widths in zip( | ||
self.header_column_widths, body_column_widths, strict=False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this False? |
||
) | ||
] | ||
|
||
def _get_body_column_widths(self) -> Sequence[int]: | ||
"""Get widths of table content columns.""" | ||
strcols: Sequence[Sequence[str]] = list(zip(*self.strrows)) | ||
strcols: Sequence[Sequence[str]] = list(zip(*self.strrows, strict=True)) | ||
return [max(len(x) for x in col) for col in strcols] | ||
|
||
def _gen_rows(self) -> Iterator[Sequence[str]]: | ||
|
@@ -899,7 +901,9 @@ def add_header_line(self) -> None: | |
header_line = self.SPACING.join( | ||
[ | ||
_put_str(header, col_width) | ||
for header, col_width in zip(self.headers, self.gross_column_widths) | ||
for header, col_width in zip( | ||
self.headers, self.gross_column_widths, strict=True | ||
) | ||
] | ||
) | ||
self._lines.append(header_line) | ||
|
@@ -909,7 +913,7 @@ def add_separator_line(self) -> None: | |
[ | ||
_put_str("-" * header_colwidth, gross_colwidth) | ||
for header_colwidth, gross_colwidth in zip( | ||
self.header_column_widths, self.gross_column_widths | ||
self.header_column_widths, self.gross_column_widths, strict=True | ||
) | ||
] | ||
) | ||
|
@@ -920,7 +924,9 @@ def add_body_lines(self) -> None: | |
body_line = self.SPACING.join( | ||
[ | ||
_put_str(col, gross_colwidth) | ||
for col, gross_colwidth in zip(row, self.gross_column_widths) | ||
for col, gross_colwidth in zip( | ||
row, self.gross_column_widths, strict=True | ||
) | ||
] | ||
) | ||
self._lines.append(body_line) | ||
|
@@ -980,6 +986,7 @@ def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: | |
self._gen_line_numbers(), | ||
self._gen_columns(), | ||
self._gen_dtypes(), | ||
strict=True, | ||
) | ||
|
||
def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: | ||
|
@@ -989,6 +996,7 @@ def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: | |
self._gen_columns(), | ||
self._gen_non_null_counts(), | ||
self._gen_dtypes(), | ||
strict=True, | ||
) | ||
|
||
def _gen_line_numbers(self) -> Iterator[str]: | ||
|
@@ -1092,10 +1100,7 @@ def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: | |
|
||
def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: | ||
"""Iterator with string representation of body data with counts.""" | ||
yield from zip( | ||
self._gen_non_null_counts(), | ||
self._gen_dtypes(), | ||
) | ||
yield from zip(self._gen_non_null_counts(), self._gen_dtypes(), strict=True) | ||
|
||
|
||
def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: | ||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -237,7 +237,7 @@ def _extract_multi_indexer_columns( | |||||||
def extract(r): | ||||||||
return tuple(r[i] for i in range(field_count) if i not in sic) | ||||||||
|
||||||||
columns = list(zip(*(extract(r) for r in header))) | ||||||||
columns = list(zip(*(extract(r) for r in header), strict=True)) | ||||||||
names = columns.copy() | ||||||||
for single_ic in sorted(ic): | ||||||||
names.insert(single_ic, single_ic) | ||||||||
|
@@ -328,9 +328,11 @@ def _agg_index(self, index) -> Index: | |||||||
|
||||||||
if self.index_names is not None: | ||||||||
names: Iterable = self.index_names | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
zip_strict = True | ||||||||
else: | ||||||||
names = itertools.cycle([None]) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
for i, (arr, name) in enumerate(zip(index, names)): | ||||||||
zip_strict = False | ||||||||
for i, (arr, name) in enumerate(zip(index, names, strict=zip_strict)): | ||||||||
if self._should_parse_dates(i): | ||||||||
arr = date_converter( | ||||||||
arr, | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -292,7 +292,7 @@ def read( | |
|
||
# rename dict keys | ||
data_tups = sorted(data.items()) | ||
data = {k: v for k, (i, v) in zip(names, data_tups)} | ||
data = {k: v for k, (i, v) in zip(names, data_tups, strict=True)} | ||
|
||
date_data = self._do_date_conversions(names, data) | ||
|
||
|
@@ -317,7 +317,7 @@ def read( | |
if self.usecols is None: | ||
self._check_data_length(names, alldata) | ||
|
||
data = {k: v for k, (i, v) in zip(names, data_tups)} | ||
data = {k: v for k, (i, v) in zip(names, data_tups, strict=False)} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this one needs to be False, and the one above doesn't? |
||
|
||
date_data = self._do_date_conversions(names, data) | ||
index, column_names = self._make_index(alldata, names) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this False?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Alvaro-Kothe the tests failed on these calls. Once I changed to False, all tests passed.