Skip to content

Commit 93c94b5

Browse files
authored
Merge branch 'main' into fix/read_csv-large-num
2 parents fc01097 + c8213d1 commit 93c94b5

File tree

23 files changed

+763
-683
lines changed

23 files changed

+763
-683
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,8 @@ Timezones
981981
^^^^^^^^^
982982
- Bug in :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` changing timezone to UTC when merging two DatetimeIndex objects with the same timezone but different units (:issue:`60080`)
983983
- Bug in :meth:`Series.dt.tz_localize` with a timezone-aware :class:`ArrowDtype` incorrectly converting to UTC when ``tz=None`` (:issue:`61780`)
984-
-
984+
- Fixed bug in :func:`date_range` where tz-aware endpoints with calendar offsets (e.g. ``"MS"``) failed on DST fall-back. These now respect ``ambiguous``/ ``nonexistent``. (:issue:`52908`)
985+
985986

986987
Numeric
987988
^^^^^^^

pandas/_libs/index.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,7 @@ cdef class BaseMultiIndexCodesEngine:
838838
raise KeyError(key)
839839
try:
840840
indices = [1 if checknull(v) else lev.get_loc(v) + multiindex_nulls_shift
841-
for lev, v in zip(self.levels, key)]
841+
for lev, v in zip(self.levels, key, strict=True)]
842842
except KeyError:
843843
raise KeyError(key)
844844

pandas/_libs/missing.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ cpdef bint check_na_tuples_nonequal(object left, object right):
7272
if len(left) != len(right):
7373
return False
7474

75-
for left_element, right_element in zip(left, right):
75+
for left_element, right_element in zip(left, right, strict=True):
7676
if left_element is C_NA and right_element is not C_NA:
7777
return True
7878
elif right_element is C_NA and left_element is not C_NA:

pandas/_libs/tslibs/fields.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def month_position_check(fields, weekdays) -> str | None:
109109
int32_t[:] months = fields["M"]
110110
int32_t[:] days = fields["D"]
111111

112-
for y, m, d, wd in zip(years, months, days, weekdays):
112+
for y, m, d, wd in zip(years, months, days, weekdays, strict=True):
113113
if calendar_start:
114114
calendar_start &= d == 1
115115
if business_start:

pandas/_libs/tslibs/offsets.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,7 +2217,7 @@ cdef class BusinessHour(BusinessMixin):
22172217
# Use python string formatting to be faster than strftime
22182218
hours = ",".join(
22192219
f"{st.hour:02d}:{st.minute:02d}-{en.hour:02d}:{en.minute:02d}"
2220-
for st, en in zip(self.start, self.end)
2220+
for st, en in zip(self.start, self.end, strict=True)
22212221
)
22222222
attrs = [f"{self._prefix}={hours}"]
22232223
out += ": " + ", ".join(attrs)
@@ -2414,7 +2414,7 @@ cdef class BusinessHour(BusinessMixin):
24142414
# get total business hours by sec in one business day
24152415
businesshours = sum(
24162416
self._get_business_hours_by_sec(st, en)
2417-
for st, en in zip(self.start, self.end)
2417+
for st, en in zip(self.start, self.end, strict=True)
24182418
)
24192419

24202420
bd, r = divmod(abs(n * 60), businesshours // 60)
@@ -5357,7 +5357,7 @@ cpdef to_offset(freq, bint is_period=False):
53575357
# the last element must be blank
53585358
raise ValueError("last element must be blank")
53595359

5360-
tups = zip(split[0::4], split[1::4], split[2::4])
5360+
tups = zip(split[0::4], split[1::4], split[2::4], strict=False)
53615361
for n, (sep, stride, name) in enumerate(tups):
53625362
name = _warn_about_deprecated_aliases(name, is_period)
53635363
_validate_to_offset_alias(name, is_period)

pandas/_libs/tslibs/timezones.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
252252
"""
253253
new_trans = list(tz._trans_list)
254254
last_std_offset = 0
255-
for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
255+
for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx, strict=True)):
256256
if not tti.isdst:
257257
last_std_offset = tti.offset
258258
new_trans[i] = trans - last_std_offset

pandas/core/arrays/datetimes.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -456,13 +456,14 @@ def _generate_range(
456456
end = _maybe_localize_point(end, freq, tz, ambiguous, nonexistent)
457457

458458
if freq is not None:
459-
# We break Day arithmetic (fixed 24 hour) here and opt for
460-
# Day to mean calendar day (23/24/25 hour). Therefore, strip
461-
# tz info from start and day to avoid DST arithmetic
462-
if isinstance(freq, Day):
463-
if start is not None:
459+
# Offset handling:
460+
# Ticks (fixed-duration like hours/minutes): keep tz; do absolute-time math.
461+
# Other calendar offsets: drop tz; do naive wall time; localize once later
462+
# so `ambiguous`/`nonexistent` are applied correctly.
463+
if not isinstance(freq, Tick):
464+
if start is not None and start.tz is not None:
464465
start = start.tz_localize(None)
465-
if end is not None:
466+
if end is not None and end.tz is not None:
466467
end = end.tz_localize(None)
467468

468469
if isinstance(freq, (Tick, Day)):

pandas/core/frame.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
is_list_like,
108108
is_scalar,
109109
is_sequence,
110+
is_string_dtype,
110111
needs_i8_conversion,
111112
pandas_dtype,
112113
)
@@ -4454,8 +4455,12 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
44544455
cols_droplevel = maybe_droplevels(cols, key)
44554456
if (
44564457
not isinstance(cols_droplevel, MultiIndex)
4458+
and is_string_dtype(cols_droplevel.dtype)
44574459
and not cols_droplevel.any()
44584460
):
4461+
# if cols_droplevel contains only empty strings,
4462+
# value.reindex(cols_droplevel, axis=1) would be full of NaNs
4463+
# see GH#62518 and GH#61841
44594464
return
44604465
if len(cols_droplevel) and not cols_droplevel.equals(value.columns):
44614466
value = value.reindex(cols_droplevel, axis=1)

pandas/core/window/rolling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def __iter__(self) -> Iterator:
351351
)
352352
self._check_window_bounds(start, end, len(obj))
353353

354-
for s, e in zip(start, end):
354+
for s, e in zip(start, end, strict=True):
355355
result = obj.iloc[slice(s, e)]
356356
yield result
357357

@@ -802,7 +802,7 @@ def _apply_pairwise(
802802
groupby_codes = []
803803
groupby_levels = []
804804
# e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]]
805-
for gb_level_pair in map(list, zip(*gb_pairs)):
805+
for gb_level_pair in map(list, zip(*gb_pairs, strict=True)):
806806
labels = np.repeat(np.array(gb_level_pair), old_result_len)
807807
codes, levels = factorize(labels)
808808
groupby_codes.append(codes)

0 commit comments

Comments
 (0)