Skip to content

Commit 386f4dc

Browse files
committed
BUG: pandas.date_range() raises NonExistentTimeError despite nonexistent="shift_forward" when crossing DST boundaries
This fixes GH#62602 where `date_range` failed for nonexistent times during DST transitions due to `Timestamp.normalize` not handling `ambiguous` and `nonexistent` parameters. Changes: - Updated `Timestamp.normalize` (in `_libs/tslibs/timestamps.pyx`): * Added `ambiguous` and `nonexistent` keyword arguments (defaulting to "raise"). * Passed these parameters to `tz_localize` to properly handle DST-related ambiguous and nonexistent timestamps. - Updated type stubs in `timestamps.pyi` to reflect the new method signature. - Extended `_maybe_normalize_endpoints` (in `core/arrays/datetimes.py`): * Added support for `ambiguous` and `nonexistent` parameters. * Updated internal normalization calls to propagate these parameters. - Updated `DatetimeArray._generate_range` logic to ensure consistent propagation of `ambiguous` and `nonexistent` through `_maybe_normalize_endpoints` both before and after timezone handling. With these changes, `date_range` now correctly handles cases where normalization intersects DST transitions, preserving expected behavior and avoiding `NonExistentTimeError` or `AmbiguousTimeError` exceptions. Co-authored-by: Mohamed Sobhy <[email protected]>
1 parent 10102e6 commit 386f4dc

File tree

3 files changed

+21
-7
lines changed

3 files changed

+21
-7
lines changed

pandas/_libs/tslibs/timestamps.pyi

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,11 @@ class Timestamp(datetime):
191191
ambiguous: bool | Literal["raise", "NaT"] = ...,
192192
nonexistent: TimestampNonexistent = ...,
193193
) -> Self: ...
194-
def normalize(self) -> Self: ...
194+
def normalize(
195+
self,
196+
ambiguous: bool | Literal["raise", "NaT"] = ...,
197+
nonexistent: TimestampNonexistent = ...,
198+
) -> Self: ...
195199
# TODO: round/floor/ceil could return NaT?
196200
def round(
197201
self,

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,7 +1308,7 @@ cdef class _Timestamp(ABCTimestamp):
13081308
# -----------------------------------------------------------------
13091309
# Transformation Methods
13101310
1311-
def normalize(self) -> "Timestamp":
1311+
def normalize(self, ambiguous="raise", nonexistent="raise") -> "Timestamp":
13121312
"""
13131313
Normalize Timestamp to midnight, preserving tz information.
13141314

@@ -1346,7 +1346,7 @@ cdef class _Timestamp(ABCTimestamp):
13461346
"Cannot normalize Timestamp without integer overflow"
13471347
) from err
13481348
ts = type(self)._from_value_and_reso(normalized, reso=self._creso, tz=None)
1349-
return ts.tz_localize(self.tzinfo)
1349+
return ts.tz_localize(self.tzinfo, ambiguous=ambiguous, nonexistent=nonexistent)
13501350
13511351
# -----------------------------------------------------------------
13521352
# Pickle Methods

pandas/core/arrays/datetimes.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,9 @@ def _generate_range(
447447
end = end.as_unit(unit, round_ok=False)
448448

449449
left_inclusive, right_inclusive = validate_inclusive(inclusive)
450-
start, end = _maybe_normalize_endpoints(start, end, normalize)
450+
start, end = _maybe_normalize_endpoints(
451+
start, end, normalize, ambiguous, nonexistent
452+
)
451453
tz = _infer_tz_from_endpoints(start, end, tz)
452454

453455
if tz is not None:
@@ -466,6 +468,10 @@ def _generate_range(
466468
if end is not None and end.tz is not None:
467469
end = end.tz_localize(None)
468470

471+
start, end = _maybe_normalize_endpoints(
472+
start, end, normalize, ambiguous, nonexistent
473+
)
474+
469475
if isinstance(freq, (Tick, Day)):
470476
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
471477
else:
@@ -2878,14 +2884,18 @@ def _infer_tz_from_endpoints(
28782884

28792885

28802886
def _maybe_normalize_endpoints(
2881-
start: _TimestampNoneT1, end: _TimestampNoneT2, normalize: bool
2887+
start: _TimestampNoneT1,
2888+
end: _TimestampNoneT2,
2889+
normalize: bool,
2890+
ambiguous: TimeAmbiguous = "raise",
2891+
nonexistent: TimeNonexistent = "raise",
28822892
) -> tuple[_TimestampNoneT1, _TimestampNoneT2]:
28832893
if normalize:
28842894
if start is not None:
2885-
start = start.normalize()
2895+
start = start.normalize(ambiguous, nonexistent)
28862896

28872897
if end is not None:
2888-
end = end.normalize()
2898+
end = end.normalize(ambiguous, nonexistent)
28892899

28902900
return start, end
28912901

0 commit comments

Comments
 (0)