Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
if (levels is None and isinstance(keys[0], tuple)) or (
levels is not None and len(levels) > 1
):
zipped = list(zip(*keys))
zipped = list(zip(*keys, strict=True))
if names is None:
names = [None] * len(zipped)

Expand All @@ -866,13 +866,13 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
# things are potentially different sizes, so compute the exact codes
# for each level and pass those to MultiIndex.from_arrays

for hlevel, level in zip(zipped, levels):
for hlevel, level in zip(zipped, levels, strict=True):
to_concat = []
if isinstance(hlevel, Index) and hlevel.equals(level):
lens = [len(idx) for idx in indexes]
codes_list.append(np.repeat(np.arange(len(hlevel)), lens))
else:
for key, index in zip(hlevel, indexes):
for key, index in zip(hlevel, indexes, strict=True):
# Find matching codes, include matching nan values as equal.
mask = (isna(level) & isna(key)) | (level == key)
if not mask.any():
Expand Down Expand Up @@ -922,7 +922,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde

# do something a bit more speedy

for hlevel, level in zip(zipped, levels):
for hlevel, level in zip(zipped, levels, strict=True):
hlevel_index = ensure_index(hlevel)
mapped = level.get_indexer(hlevel_index)

Expand Down
14 changes: 10 additions & 4 deletions pandas/core/reshape/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ def check_len(item, name: str) -> None:
# columns to prepend to result.
with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)]

for col, pre, sep in zip(data_to_encode.items(), prefix, prefix_sep):
for col, pre, sep in zip(
data_to_encode.items(), prefix, prefix_sep, strict=False
):
# col is (column_name, column), use just column data here
dummy = _get_dummies_1d(
col[1],
Expand Down Expand Up @@ -323,15 +325,15 @@ def get_empty_frame(data) -> DataFrame:
codes = codes[mask]
n_idx = np.arange(N)[mask]

for ndx, code in zip(n_idx, codes):
for ndx, code in zip(n_idx, codes, strict=True):
sp_indices[code].append(ndx)

if drop_first:
# remove first categorical level to avoid perfect collinearity
# GH12042
sp_indices = sp_indices[1:]
dummy_cols = dummy_cols[1:]
for col, ixs in zip(dummy_cols, sp_indices):
for col, ixs in zip(dummy_cols, sp_indices, strict=True):
sarr = SparseArray(
np.ones(len(ixs), dtype=dtype),
sparse_index=IntIndex(N, ixs),
Expand Down Expand Up @@ -535,7 +537,11 @@ def from_dummies(
raise ValueError(len_msg)
elif isinstance(default_category, Hashable):
default_category = dict(
zip(variables_slice, [default_category] * len(variables_slice))
zip(
variables_slice,
[default_category] * len(variables_slice),
strict=True,
)
)
else:
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def melt(
missing = idx == -1
if missing.any():
missing_labels = [
lab for lab, not_found in zip(labels, missing) if not_found
lab for lab, not_found in zip(labels, missing, strict=True) if not_found
]
raise KeyError(
"The following id_vars or value_vars are not present in "
Expand Down
18 changes: 9 additions & 9 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
"""
names_to_restore = []
for name, left_key, right_key in zip(
self.join_names, self.left_on, self.right_on
self.join_names, self.left_on, self.right_on, strict=True
):
if (
# Argument 1 to "_is_level_reference" of "NDFrame" has incompatible
Expand Down Expand Up @@ -1263,7 +1263,7 @@ def _maybe_add_join_keys(

assert all(isinstance(x, _known) for x in self.left_join_keys)

keys = zip(self.join_names, self.left_on, self.right_on)
keys = zip(self.join_names, self.left_on, self.right_on, strict=True)
for i, (name, lname, rname) in enumerate(keys):
if not _should_fill(lname, rname):
continue
Expand Down Expand Up @@ -1572,7 +1572,7 @@ def _get_merge_keys(

# ugh, spaghetti re #733
if _any(self.left_on) and _any(self.right_on):
for lk, rk in zip(self.left_on, self.right_on):
for lk, rk in zip(self.left_on, self.right_on, strict=True):
lk = extract_array(lk, extract_numpy=True)
rk = extract_array(rk, extract_numpy=True)
if is_lkey(lk):
Expand Down Expand Up @@ -1635,7 +1635,7 @@ def _get_merge_keys(
right_keys = [
lev._values.take(lev_codes)
for lev, lev_codes in zip(
self.right.index.levels, self.right.index.codes
self.right.index.levels, self.right.index.codes, strict=True
)
]
else:
Expand All @@ -1657,7 +1657,7 @@ def _get_merge_keys(
left_keys = [
lev._values.take(lev_codes)
for lev, lev_codes in zip(
self.left.index.levels, self.left.index.codes
self.left.index.levels, self.left.index.codes, strict=True
)
]
else:
Expand All @@ -1674,7 +1674,7 @@ def _maybe_coerce_merge_keys(self) -> None:
# or if we have object and integer dtypes

for lk, rk, name in zip(
self.left_join_keys, self.right_join_keys, self.join_names
self.left_join_keys, self.right_join_keys, self.join_names, strict=True
):
if (len(lk) and not len(rk)) or (not len(lk) and len(rk)):
continue
Expand Down Expand Up @@ -2046,7 +2046,7 @@ def get_join_indexers(
_factorize_keys(left_keys[n], right_keys[n], sort=sort)
for n in range(len(left_keys))
)
zipped = zip(*mapped)
zipped = zip(*mapped, strict=True)
llab, rlab, shape = (list(x) for x in zipped)

# get flat i8 keys from label lists
Expand Down Expand Up @@ -2431,7 +2431,7 @@ def _check_dtype_match(left: ArrayLike, right: ArrayLike, i: int) -> None:
raise MergeError(msg)

# validate index types are the same
for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)):
for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys, strict=True)):
_check_dtype_match(lk, rk, i)

if self.left_index:
Expand Down Expand Up @@ -2616,7 +2616,7 @@ def _get_multiindex_indexer(
_factorize_keys(index.levels[n]._values, join_keys[n], sort=sort)
for n in range(index.nlevels)
)
zipped = zip(*mapped)
zipped = zip(*mapped, strict=True)
rcodes, lcodes, shape = (list(x) for x in zipped)
if sort:
rcodes = list(map(np.take, rcodes, index.codes))
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,8 +1098,8 @@ def crosstab(
from pandas import DataFrame

data = {
**dict(zip(unique_rownames, index)),
**dict(zip(unique_colnames, columns)),
**dict(zip(unique_rownames, index, strict=True)),
**dict(zip(unique_colnames, columns, strict=True)),
}
df = DataFrame(data, index=common_idx)

Expand Down
12 changes: 7 additions & 5 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,9 @@ def stack_factorize(index):
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
)
else:
levels, (ilab, clab) = zip(*map(stack_factorize, (frame.index, frame.columns)))
levels, (ilab, clab) = zip(
*map(stack_factorize, (frame.index, frame.columns)), strict=True
)
codes = ilab.repeat(K), np.tile(clab, N).ravel()
new_index = MultiIndex(
levels=levels,
Expand Down Expand Up @@ -778,21 +780,21 @@ def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex | Index:

levs = (
[lev[c] if c >= 0 else None for c in codes]
for lev, codes in zip(columns.levels[:-1], columns.codes[:-1])
for lev, codes in zip(columns.levels[:-1], columns.codes[:-1], strict=True)
)

# Remove duplicate tuples in the MultiIndex.
tuples = zip(*levs)
tuples = zip(*levs, strict=True)
unique_tuples = (key for key, _ in itertools.groupby(tuples))
new_levs = zip(*unique_tuples)
new_levs = zip(*unique_tuples, strict=True)

# The dtype of each level must be explicitly set to avoid inferring the wrong type.
# See GH-36991.
return MultiIndex.from_arrays(
[
# Not all indices can accept None values.
Index(new_lev, dtype=lev.dtype) if None not in new_lev else new_lev
for new_lev, lev in zip(new_levs, columns.levels)
for new_lev, lev in zip(new_levs, columns.levels, strict=True)
],
names=columns.names[:-1],
)
Expand Down
Loading