diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6e475acc5d971..c49695f009fc8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -902,6 +902,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`) - Bug in :meth:`Index.union` with a ``pyarrow`` timestamp dtype incorrectly returning ``object`` dtype (:issue:`58421`) - Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`) +- Bug in :meth:`Timestamp.normalize` and :meth:`DatetimeArray.normalize` returning incorrect results instead of raising on integer overflow for very small (distant past) values (:issue:`60583`) - Bug in :meth:`Timestamp.replace` failing to update ``unit`` attribute when replacement introduces non-zero ``nanosecond`` or ``microsecond`` (:issue:`57749`) - Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`) - Bug in :meth:`to_datetime` on float array with missing values throwing ``FloatingPointError`` (:issue:`58419`) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index df1ead215a2be..2f0c5fa9ef18e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1339,7 +1339,12 @@ cdef class _Timestamp(ABCTimestamp): int64_t ppd = periods_per_day(self._creso) _Timestamp ts - normalized = normalize_i8_stamp(local_val, ppd) + try: + normalized = normalize_i8_stamp(local_val, ppd) + except OverflowError as err: + raise ValueError( + "Cannot normalize Timestamp without integer overflow" + ) from err ts = type(self)._from_value_and_reso(normalized, reso=self._creso, tz=None) return ts.tz_localize(self.tzinfo) @@ -3547,7 +3552,7 @@ Timestamp.daysinmonth = Timestamp.days_in_month @cython.cdivision(False) -cdef int64_t normalize_i8_stamp(int64_t local_val, int64_t ppd) noexcept nogil: +cdef int64_t normalize_i8_stamp(int64_t local_val, int64_t ppd): """ Round the localized nanosecond timestamp down to the previous midnight. @@ -3561,4 +3566,6 @@ cdef int64_t normalize_i8_stamp(int64_t local_val, int64_t ppd) noexcept nogil: ------- int64_t """ - return local_val - (local_val % ppd) + with cython.overflowcheck(True): + # GH#60583 + return local_val - (local_val % ppd) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 1e09874639d4f..636104a1e0f1e 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -254,6 +254,7 @@ def get_resolution( # ------------------------------------------------------------------------- +@cython.overflowcheck(True) @cython.cdivision(False) @cython.wraparound(False) @cython.boundscheck(False) @@ -292,8 +293,12 @@ cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz, NPY_DATETIMEUNI res_val = NPY_NAT else: local_val = info.utc_val_to_local_val(utc_val, &pos) - res_val = local_val - (local_val % ppd) - + try: + res_val = local_val - (local_val % ppd) + except OverflowError as err: + raise ValueError( + "Cannot normalize Timestamp without integer overflow" + ) from err # Analogous to: result[i] = res_val (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index a2a2d57a33925..199e3572732a0 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -93,6 +93,15 @@ def test_normalize(self, unit): res = dta.normalize() tm.assert_extension_array_equal(res, expected) + def test_normalize_overflow_raises(self): + # GH#60583 + ts = pd.Timestamp.min + dta = DatetimeArray._from_sequence([ts], dtype="M8[ns]") + + msg = "Cannot normalize Timestamp without integer overflow" + with pytest.raises(ValueError, match=msg): + dta.normalize() + def test_simple_new_requires_match(self, unit): arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") dtype = DatetimeTZDtype(unit, "UTC") diff --git a/pandas/tests/scalar/timestamp/methods/test_normalize.py b/pandas/tests/scalar/timestamp/methods/test_normalize.py index 60f249c602bd6..4e8a6e1bc7197 100644 --- a/pandas/tests/scalar/timestamp/methods/test_normalize.py +++ b/pandas/tests/scalar/timestamp/methods/test_normalize.py @@ -19,3 +19,11 @@ def test_normalize_pre_epoch_dates(self): result = Timestamp("1969-01-01 09:00:00").normalize() expected = Timestamp("1969-01-01 00:00:00") assert result == expected + + def test_normalize_overflow_raises(self): + # GH#60583 + ts = Timestamp.min + + msg = "Cannot normalize Timestamp without integer overflow" + with pytest.raises(ValueError, match=msg): + ts.normalize()