Skip to content

Commit 01028bf

Browse files
authored
Merge branch 'main' into update-docs-data-table-representation
2 parents 5c6afee + c8213d1 commit 01028bf

File tree

15 files changed

+129
-85
lines changed

15 files changed

+129
-85
lines changed

pandas/_libs/index.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,7 @@ cdef class BaseMultiIndexCodesEngine:
838838
raise KeyError(key)
839839
try:
840840
indices = [1 if checknull(v) else lev.get_loc(v) + multiindex_nulls_shift
841-
for lev, v in zip(self.levels, key)]
841+
for lev, v in zip(self.levels, key, strict=True)]
842842
except KeyError:
843843
raise KeyError(key)
844844

pandas/_libs/missing.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ cpdef bint check_na_tuples_nonequal(object left, object right):
7272
if len(left) != len(right):
7373
return False
7474

75-
for left_element, right_element in zip(left, right):
75+
for left_element, right_element in zip(left, right, strict=True):
7676
if left_element is C_NA and right_element is not C_NA:
7777
return True
7878
elif right_element is C_NA and left_element is not C_NA:

pandas/_libs/tslibs/fields.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def month_position_check(fields, weekdays) -> str | None:
109109
int32_t[:] months = fields["M"]
110110
int32_t[:] days = fields["D"]
111111

112-
for y, m, d, wd in zip(years, months, days, weekdays):
112+
for y, m, d, wd in zip(years, months, days, weekdays, strict=True):
113113
if calendar_start:
114114
calendar_start &= d == 1
115115
if business_start:

pandas/_libs/tslibs/offsets.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,7 +2217,7 @@ cdef class BusinessHour(BusinessMixin):
22172217
# Use python string formatting to be faster than strftime
22182218
hours = ",".join(
22192219
f"{st.hour:02d}:{st.minute:02d}-{en.hour:02d}:{en.minute:02d}"
2220-
for st, en in zip(self.start, self.end)
2220+
for st, en in zip(self.start, self.end, strict=True)
22212221
)
22222222
attrs = [f"{self._prefix}={hours}"]
22232223
out += ": " + ", ".join(attrs)
@@ -2414,7 +2414,7 @@ cdef class BusinessHour(BusinessMixin):
24142414
# get total business hours by sec in one business day
24152415
businesshours = sum(
24162416
self._get_business_hours_by_sec(st, en)
2417-
for st, en in zip(self.start, self.end)
2417+
for st, en in zip(self.start, self.end, strict=True)
24182418
)
24192419

24202420
bd, r = divmod(abs(n * 60), businesshours // 60)
@@ -5357,7 +5357,7 @@ cpdef to_offset(freq, bint is_period=False):
53575357
# the last element must be blank
53585358
raise ValueError("last element must be blank")
53595359

5360-
tups = zip(split[0::4], split[1::4], split[2::4])
5360+
tups = zip(split[0::4], split[1::4], split[2::4], strict=False)
53615361
for n, (sep, stride, name) in enumerate(tups):
53625362
name = _warn_about_deprecated_aliases(name, is_period)
53635363
_validate_to_offset_alias(name, is_period)

pandas/_libs/tslibs/timezones.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
252252
"""
253253
new_trans = list(tz._trans_list)
254254
last_std_offset = 0
255-
for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
255+
for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx, strict=True)):
256256
if not tti.isdst:
257257
last_std_offset = tti.offset
258258
new_trans[i] = trans - last_std_offset

pandas/core/frame.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
is_list_like,
108108
is_scalar,
109109
is_sequence,
110+
is_string_dtype,
110111
needs_i8_conversion,
111112
pandas_dtype,
112113
)
@@ -4454,8 +4455,12 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
44544455
cols_droplevel = maybe_droplevels(cols, key)
44554456
if (
44564457
not isinstance(cols_droplevel, MultiIndex)
4458+
and is_string_dtype(cols_droplevel.dtype)
44574459
and not cols_droplevel.any()
44584460
):
4461+
# if cols_droplevel contains only empty strings,
4462+
# value.reindex(cols_droplevel, axis=1) would be full of NaNs
4463+
# see GH#62518 and GH#61841
44594464
return
44604465
if len(cols_droplevel) and not cols_droplevel.equals(value.columns):
44614466
value = value.reindex(cols_droplevel, axis=1)

pandas/core/window/rolling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def __iter__(self) -> Iterator:
351351
)
352352
self._check_window_bounds(start, end, len(obj))
353353

354-
for s, e in zip(start, end):
354+
for s, e in zip(start, end, strict=True):
355355
result = obj.iloc[slice(s, e)]
356356
yield result
357357

@@ -802,7 +802,7 @@ def _apply_pairwise(
802802
groupby_codes = []
803803
groupby_levels = []
804804
# e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]]
805-
for gb_level_pair in map(list, zip(*gb_pairs)):
805+
for gb_level_pair in map(list, zip(*gb_pairs, strict=True)):
806806
labels = np.repeat(np.array(gb_level_pair), old_result_len)
807807
codes, levels = factorize(labels)
808808
groupby_codes.append(codes)

pandas/tests/frame/test_arithmetic.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import numpy as np
1212
import pytest
1313

14+
from pandas.compat._optional import import_optional_dependency
15+
1416
import pandas as pd
1517
from pandas import (
1618
DataFrame,
@@ -24,6 +26,7 @@
2426
_check_mixed_float,
2527
_check_mixed_int,
2628
)
29+
from pandas.util.version import Version
2730

2831

2932
@pytest.fixture
@@ -1114,6 +1117,8 @@ def test_binop_other(self, op, value, dtype, switch_numexpr_min_elements):
11141117
(operator.mod, "complex128"),
11151118
}
11161119

1120+
ne = import_optional_dependency("numexpr", errors="ignore")
1121+
ne_warns_on_op = ne is not None and Version(ne.__version__) < Version("2.13.1")
11171122
if (op, dtype) in invalid:
11181123
warn = None
11191124
if (dtype == "<M8[ns]" and op == operator.add) or (
@@ -1142,7 +1147,11 @@ def test_binop_other(self, op, value, dtype, switch_numexpr_min_elements):
11421147

11431148
elif (op, dtype) in skip:
11441149
if op in [operator.add, operator.mul]:
1145-
if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0:
1150+
if (
1151+
expr.USE_NUMEXPR
1152+
and switch_numexpr_min_elements == 0
1153+
and ne_warns_on_op
1154+
):
11461155
warn = UserWarning
11471156
else:
11481157
warn = None

pandas/tests/indexing/multiindex/test_multiindex.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,3 +271,24 @@ def test_multiindex_assign_aligns_as_implicit_tuple(self):
271271
df1["C"] = s1
272272
tm.assert_frame_equal(df1, df2)
273273
tm.assert_frame_equal(df1, df3)
274+
275+
def test_multiindex_assign_alignment_with_non_string_dtype(self):
276+
# GH 62518
277+
columns = MultiIndex.from_arrays(
278+
[["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])]
279+
)
280+
281+
meta = DataFrame(columns=columns, dtype=object)
282+
meta["z"] = meta["z"].astype("int64")
283+
284+
result = DataFrame(
285+
data={
286+
("a", 1): Series([], dtype=object),
287+
("a", 2): Series([], dtype=object),
288+
("z", 1): Series([], dtype="int64"),
289+
("z", 2): Series([], dtype="int64"),
290+
},
291+
columns=columns,
292+
)
293+
294+
tm.assert_frame_equal(meta, result)

pandas/tests/io/json/test_compression.py

Lines changed: 43 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,21 @@
1212
import pandas._testing as tm
1313

1414

15-
def test_compression_roundtrip(compression):
15+
def test_compression_roundtrip(compression, temp_file):
1616
df = pd.DataFrame(
1717
[[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
1818
index=["A", "B"],
1919
columns=["X", "Y", "Z"],
2020
)
2121

22-
with tm.ensure_clean() as path:
23-
df.to_json(path, compression=compression)
24-
tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
22+
df.to_json(temp_file, compression=compression)
23+
tm.assert_frame_equal(df, pd.read_json(temp_file, compression=compression))
2524

26-
# explicitly ensure file was compressed.
27-
with tm.decompress_file(path, compression) as fh:
28-
result = fh.read().decode("utf8")
29-
data = StringIO(result)
30-
tm.assert_frame_equal(df, pd.read_json(data))
25+
# explicitly ensure file was compressed.
26+
with tm.decompress_file(temp_file, compression) as fh:
27+
result = fh.read().decode("utf8")
28+
data = StringIO(result)
29+
tm.assert_frame_equal(df, pd.read_json(data))
3130

3231

3332
def test_read_zipped_json(datapath):
@@ -43,15 +42,14 @@ def test_read_zipped_json(datapath):
4342
@td.skip_if_not_us_locale
4443
@pytest.mark.single_cpu
4544
@pytest.mark.network
46-
def test_with_s3_url(compression, s3_bucket_public, s3so):
45+
def test_with_s3_url(compression, s3_bucket_public, s3so, temp_file):
4746
# Bucket created in tests/io/conftest.py
4847
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
4948

5049
key = f"{uuid.uuid4()}.json"
51-
with tm.ensure_clean() as path:
52-
df.to_json(path, compression=compression)
53-
with open(path, "rb") as f:
54-
s3_bucket_public.put_object(Key=key, Body=f)
50+
df.to_json(temp_file, compression=compression)
51+
with open(temp_file, "rb") as f:
52+
s3_bucket_public.put_object(Key=key, Body=f)
5553

5654
roundtripped_df = pd.read_json(
5755
f"s3://{s3_bucket_public.name}/{key}",
@@ -61,39 +59,35 @@ def test_with_s3_url(compression, s3_bucket_public, s3so):
6159
tm.assert_frame_equal(df, roundtripped_df)
6260

6361

64-
def test_lines_with_compression(compression):
65-
with tm.ensure_clean() as path:
66-
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
67-
df.to_json(path, orient="records", lines=True, compression=compression)
68-
roundtripped_df = pd.read_json(path, lines=True, compression=compression)
69-
tm.assert_frame_equal(df, roundtripped_df)
62+
def test_lines_with_compression(compression, temp_file):
63+
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
64+
df.to_json(temp_file, orient="records", lines=True, compression=compression)
65+
roundtripped_df = pd.read_json(temp_file, lines=True, compression=compression)
66+
tm.assert_frame_equal(df, roundtripped_df)
7067

7168

72-
def test_chunksize_with_compression(compression):
73-
with tm.ensure_clean() as path:
74-
df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
75-
df.to_json(path, orient="records", lines=True, compression=compression)
69+
def test_chunksize_with_compression(compression, temp_file):
70+
df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
71+
df.to_json(temp_file, orient="records", lines=True, compression=compression)
7672

77-
with pd.read_json(
78-
path, lines=True, chunksize=1, compression=compression
79-
) as res:
80-
roundtripped_df = pd.concat(res)
81-
tm.assert_frame_equal(df, roundtripped_df)
73+
with pd.read_json(
74+
temp_file, lines=True, chunksize=1, compression=compression
75+
) as res:
76+
roundtripped_df = pd.concat(res)
77+
tm.assert_frame_equal(df, roundtripped_df)
8278

8379

84-
def test_write_unsupported_compression_type():
80+
def test_write_unsupported_compression_type(temp_file):
8581
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
86-
with tm.ensure_clean() as path:
87-
msg = "Unrecognized compression type: unsupported"
88-
with pytest.raises(ValueError, match=msg):
89-
df.to_json(path, compression="unsupported")
82+
msg = "Unrecognized compression type: unsupported"
83+
with pytest.raises(ValueError, match=msg):
84+
df.to_json(temp_file, compression="unsupported")
9085

9186

92-
def test_read_unsupported_compression_type():
93-
with tm.ensure_clean() as path:
94-
msg = "Unrecognized compression type: unsupported"
95-
with pytest.raises(ValueError, match=msg):
96-
pd.read_json(path, compression="unsupported")
87+
def test_read_unsupported_compression_type(temp_file):
88+
msg = "Unrecognized compression type: unsupported"
89+
with pytest.raises(ValueError, match=msg):
90+
pd.read_json(temp_file, compression="unsupported")
9791

9892

9993
@pytest.mark.parametrize(
@@ -102,25 +96,28 @@ def test_read_unsupported_compression_type():
10296
@pytest.mark.parametrize("to_infer", [True, False])
10397
@pytest.mark.parametrize("read_infer", [True, False])
10498
def test_to_json_compression(
105-
compression_only, read_infer, to_infer, compression_to_extension, infer_string
99+
compression_only,
100+
read_infer,
101+
to_infer,
102+
compression_to_extension,
103+
infer_string,
104+
tmp_path,
106105
):
107106
with pd.option_context("future.infer_string", infer_string):
108107
# see gh-15008
109108
compression = compression_only
110109

111110
# We'll complete file extension subsequently.
112-
filename = "test."
113-
filename += compression_to_extension[compression]
111+
filename = tmp_path / f"test.{compression_to_extension[compression]}"
114112

115113
df = pd.DataFrame({"A": [1]})
116114

117115
to_compression = "infer" if to_infer else compression
118116
read_compression = "infer" if read_infer else compression
119117

120-
with tm.ensure_clean(filename) as path:
121-
df.to_json(path, compression=to_compression)
122-
result = pd.read_json(path, compression=read_compression)
123-
tm.assert_frame_equal(result, df)
118+
df.to_json(filename, compression=to_compression)
119+
result = pd.read_json(filename, compression=read_compression)
120+
tm.assert_frame_equal(result, df)
124121

125122

126123
def test_to_json_compression_mode(compression):

0 commit comments

Comments
 (0)