Skip to content

Commit 095778b

Browse files
committed
add strict param
1 parent e4ca405 commit 095778b

File tree

6 files changed

+33
-25
lines changed

6 files changed

+33
-25
lines changed

pandas/core/reshape/concat.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
840840
if (levels is None and isinstance(keys[0], tuple)) or (
841841
levels is not None and len(levels) > 1
842842
):
843-
zipped = list(zip(*keys))
843+
zipped = list(zip(*keys, strict=True))
844844
if names is None:
845845
names = [None] * len(zipped)
846846

@@ -866,13 +866,13 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
866866
# things are potentially different sizes, so compute the exact codes
867867
# for each level and pass those to MultiIndex.from_arrays
868868

869-
for hlevel, level in zip(zipped, levels):
869+
for hlevel, level in zip(zipped, levels, strict=True):
870870
to_concat = []
871871
if isinstance(hlevel, Index) and hlevel.equals(level):
872872
lens = [len(idx) for idx in indexes]
873873
codes_list.append(np.repeat(np.arange(len(hlevel)), lens))
874874
else:
875-
for key, index in zip(hlevel, indexes):
875+
for key, index in zip(hlevel, indexes, strict=True):
876876
# Find matching codes, include matching nan values as equal.
877877
mask = (isna(level) & isna(key)) | (level == key)
878878
if not mask.any():
@@ -922,7 +922,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
922922

923923
# do something a bit more speedy
924924

925-
for hlevel, level in zip(zipped, levels):
925+
for hlevel, level in zip(zipped, levels, strict=True):
926926
hlevel_index = ensure_index(hlevel)
927927
mapped = level.get_indexer(hlevel_index)
928928

pandas/core/reshape/encoding.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,9 @@ def check_len(item, name: str) -> None:
209209
# columns to prepend to result.
210210
with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)]
211211

212-
for col, pre, sep in zip(data_to_encode.items(), prefix, prefix_sep):
212+
for col, pre, sep in zip(
213+
data_to_encode.items(), prefix, prefix_sep, strict=False
214+
):
213215
# col is (column_name, column), use just column data here
214216
dummy = _get_dummies_1d(
215217
col[1],
@@ -323,15 +325,15 @@ def get_empty_frame(data) -> DataFrame:
323325
codes = codes[mask]
324326
n_idx = np.arange(N)[mask]
325327

326-
for ndx, code in zip(n_idx, codes):
328+
for ndx, code in zip(n_idx, codes, strict=True):
327329
sp_indices[code].append(ndx)
328330

329331
if drop_first:
330332
# remove first categorical level to avoid perfect collinearity
331333
# GH12042
332334
sp_indices = sp_indices[1:]
333335
dummy_cols = dummy_cols[1:]
334-
for col, ixs in zip(dummy_cols, sp_indices):
336+
for col, ixs in zip(dummy_cols, sp_indices, strict=True):
335337
sarr = SparseArray(
336338
np.ones(len(ixs), dtype=dtype),
337339
sparse_index=IntIndex(N, ixs),
@@ -535,7 +537,11 @@ def from_dummies(
535537
raise ValueError(len_msg)
536538
elif isinstance(default_category, Hashable):
537539
default_category = dict(
538-
zip(variables_slice, [default_category] * len(variables_slice))
540+
zip(
541+
variables_slice,
542+
[default_category] * len(variables_slice),
543+
strict=True,
544+
)
539545
)
540546
else:
541547
raise TypeError(

pandas/core/reshape/melt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def melt(
199199
missing = idx == -1
200200
if missing.any():
201201
missing_labels = [
202-
lab for lab, not_found in zip(labels, missing) if not_found
202+
lab for lab, not_found in zip(labels, missing, strict=True) if not_found
203203
]
204204
raise KeyError(
205205
"The following id_vars or value_vars are not present in "

pandas/core/reshape/merge.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,7 +1230,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
12301230
"""
12311231
names_to_restore = []
12321232
for name, left_key, right_key in zip(
1233-
self.join_names, self.left_on, self.right_on
1233+
self.join_names, self.left_on, self.right_on, strict=True
12341234
):
12351235
if (
12361236
# Argument 1 to "_is_level_reference" of "NDFrame" has incompatible
@@ -1263,7 +1263,7 @@ def _maybe_add_join_keys(
12631263

12641264
assert all(isinstance(x, _known) for x in self.left_join_keys)
12651265

1266-
keys = zip(self.join_names, self.left_on, self.right_on)
1266+
keys = zip(self.join_names, self.left_on, self.right_on, strict=True)
12671267
for i, (name, lname, rname) in enumerate(keys):
12681268
if not _should_fill(lname, rname):
12691269
continue
@@ -1572,7 +1572,7 @@ def _get_merge_keys(
15721572

15731573
# ugh, spaghetti re #733
15741574
if _any(self.left_on) and _any(self.right_on):
1575-
for lk, rk in zip(self.left_on, self.right_on):
1575+
for lk, rk in zip(self.left_on, self.right_on, strict=True):
15761576
lk = extract_array(lk, extract_numpy=True)
15771577
rk = extract_array(rk, extract_numpy=True)
15781578
if is_lkey(lk):
@@ -1635,7 +1635,7 @@ def _get_merge_keys(
16351635
right_keys = [
16361636
lev._values.take(lev_codes)
16371637
for lev, lev_codes in zip(
1638-
self.right.index.levels, self.right.index.codes
1638+
self.right.index.levels, self.right.index.codes, strict=True
16391639
)
16401640
]
16411641
else:
@@ -1657,7 +1657,7 @@ def _get_merge_keys(
16571657
left_keys = [
16581658
lev._values.take(lev_codes)
16591659
for lev, lev_codes in zip(
1660-
self.left.index.levels, self.left.index.codes
1660+
self.left.index.levels, self.left.index.codes, strict=True
16611661
)
16621662
]
16631663
else:
@@ -1674,7 +1674,7 @@ def _maybe_coerce_merge_keys(self) -> None:
16741674
# or if we have object and integer dtypes
16751675

16761676
for lk, rk, name in zip(
1677-
self.left_join_keys, self.right_join_keys, self.join_names
1677+
self.left_join_keys, self.right_join_keys, self.join_names, strict=True
16781678
):
16791679
if (len(lk) and not len(rk)) or (not len(lk) and len(rk)):
16801680
continue
@@ -2042,7 +2042,7 @@ def get_join_indexers(
20422042
_factorize_keys(left_keys[n], right_keys[n], sort=sort)
20432043
for n in range(len(left_keys))
20442044
)
2045-
zipped = zip(*mapped)
2045+
zipped = zip(*mapped, strict=True)
20462046
llab, rlab, shape = (list(x) for x in zipped)
20472047

20482048
# get flat i8 keys from label lists
@@ -2427,7 +2427,7 @@ def _check_dtype_match(left: ArrayLike, right: ArrayLike, i: int) -> None:
24272427
raise MergeError(msg)
24282428

24292429
# validate index types are the same
2430-
for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)):
2430+
for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys, strict=True)):
24312431
_check_dtype_match(lk, rk, i)
24322432

24332433
if self.left_index:
@@ -2612,7 +2612,7 @@ def _get_multiindex_indexer(
26122612
_factorize_keys(index.levels[n]._values, join_keys[n], sort=sort)
26132613
for n in range(index.nlevels)
26142614
)
2615-
zipped = zip(*mapped)
2615+
zipped = zip(*mapped, strict=True)
26162616
rcodes, lcodes, shape = (list(x) for x in zipped)
26172617
if sort:
26182618
rcodes = list(map(np.take, rcodes, index.codes))

pandas/core/reshape/pivot.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,8 +1098,8 @@ def crosstab(
10981098
from pandas import DataFrame
10991099

11001100
data = {
1101-
**dict(zip(unique_rownames, index)),
1102-
**dict(zip(unique_colnames, columns)),
1101+
**dict(zip(unique_rownames, index, strict=True)),
1102+
**dict(zip(unique_colnames, columns, strict=True)),
11031103
}
11041104
df = DataFrame(data, index=common_idx)
11051105

pandas/core/reshape/reshape.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,9 @@ def stack_factorize(index):
696696
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
697697
)
698698
else:
699-
levels, (ilab, clab) = zip(*map(stack_factorize, (frame.index, frame.columns)))
699+
levels, (ilab, clab) = zip(
700+
*map(stack_factorize, (frame.index, frame.columns)), strict=True
701+
)
700702
codes = ilab.repeat(K), np.tile(clab, N).ravel()
701703
new_index = MultiIndex(
702704
levels=levels,
@@ -778,21 +780,21 @@ def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex | Index:
778780

779781
levs = (
780782
[lev[c] if c >= 0 else None for c in codes]
781-
for lev, codes in zip(columns.levels[:-1], columns.codes[:-1])
783+
for lev, codes in zip(columns.levels[:-1], columns.codes[:-1], strict=True)
782784
)
783785

784786
# Remove duplicate tuples in the MultiIndex.
785-
tuples = zip(*levs)
787+
tuples = zip(*levs, strict=True)
786788
unique_tuples = (key for key, _ in itertools.groupby(tuples))
787-
new_levs = zip(*unique_tuples)
789+
new_levs = zip(*unique_tuples, strict=True)
788790

789791
# The dtype of each level must be explicitly set to avoid inferring the wrong type.
790792
# See GH-36991.
791793
return MultiIndex.from_arrays(
792794
[
793795
# Not all indices can accept None values.
794796
Index(new_lev, dtype=lev.dtype) if None not in new_lev else new_lev
795-
for new_lev, lev in zip(new_levs, columns.levels)
797+
for new_lev, lev in zip(new_levs, columns.levels, strict=True)
796798
],
797799
names=columns.names[:-1],
798800
)

0 commit comments

Comments
 (0)