From b211e9e81d16d51fc313e8e9d3168ae9cf96f348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Sat, 27 Sep 2025 11:48:10 -0300 Subject: [PATCH] fix: raise error when converting `NaT` to int Previously, it was converting it to -2^63. --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/datetimelike.py | 2 ++ pandas/core/dtypes/astype.py | 34 +++++++++++++++++++--- pandas/tests/series/methods/test_astype.py | 9 ++++++ 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 99a6be03c84d3..af1c4fe1fccdf 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -964,6 +964,7 @@ Datetimelike - Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`) - Bug in :meth:`to_datetime` with ``format="ISO8601"`` and ``utc=True`` where naive timestamps incorrectly inherited timezone offset from previous timestamps in a series. (:issue:`61389`) - Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`) +- Bug in Datetime-like arrays converting ``NaT`` to -(2**63) when casting to integer. Now it raises ``IntCastingNaNError`` (:issue:`59711`) - Bug in comparison between objects with ``np.datetime64`` dtype and ``timestamp[pyarrow]`` dtypes incorrectly raising ``TypeError`` (:issue:`60937`) - Bug in comparison between objects with pyarrow date dtype and ``timestamp[pyarrow]`` or ``np.datetime64`` dtype failing to consider these as non-comparable (:issue:`62157`) - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c68b329b00968..085084ba88196 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -87,6 +87,7 @@ ) from pandas.util._exceptions import find_stack_level +from pandas.core.dtypes.astype import check_to_int_nansafe from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( is_all_strings, @@ -490,6 +491,7 @@ def astype(self, dtype, copy: bool = True): elif dtype.kind in "iu": # we deliberately ignore int32 vs. int64 here. # See https://github.com/pandas-dev/pandas/issues/24381 for more. + check_to_int_nansafe(self, infinite_check=False) values = self.asi8 if dtype != np.int64: raise TypeError( diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 086f7d2da6640..930b25c84a610 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -27,6 +27,7 @@ ExtensionDtype, NumpyEADtype, ) +from pandas.core.dtypes.missing import isna if TYPE_CHECKING: from pandas._typing import ( @@ -138,10 +139,7 @@ def _astype_float_to_int_nansafe( """ astype with a check preventing converting NaN to an meaningless integer value. """ - if not np.isfinite(values).all(): - raise IntCastingNaNError( - "Cannot convert non-finite values (NA or inf) to integer" - ) + check_to_int_nansafe(values) if dtype.kind == "u": # GH#45151 if not (values >= 0).all(): @@ -151,6 +149,34 @@ def _astype_float_to_int_nansafe( return values.astype(dtype, copy=copy) +def check_to_int_nansafe(values: np.ndarray, infinite_check: bool = True): + """ + Validate that values can be safely converted to integer type. + + Parameters + ---------- + values : np.ndarray + Array of values to check for conversion to integer. + infinite_check : bool + If True, check for both infinite and missing values. + If False, check only for missing values. + This distinction exists + because ``np.isfinite`` isn't compatible with ``np.ufunc``. + + Raises + ------ + IntCastingNaNError + If any non-finite value is detected. + """ + error_msg = "Cannot convert non-finite values (NA or inf) to integer" + if infinite_check: + if not np.isfinite(values).all(): + raise IntCastingNaNError(error_msg) + else: + if isna(values).any(): + raise IntCastingNaNError(error_msg) + + def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: """ Cast array (ndarray or ExtensionArray) to the new dtype. diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index aa38e63c826f6..37138068058ee 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -389,6 +389,15 @@ def test_astype_cast_nan_inf_int(self, any_int_numpy_dtype, value): with pytest.raises(ValueError, match=msg): ser.astype(any_int_numpy_dtype) + @pytest.mark.parametrize("dtype", tm.DATETIME64_DTYPES + tm.TIMEDELTA64_DTYPES) + def test_int_cast_raises_with_datetime(self, dtype): + # GH 59711 + ser = Series([np.nan], dtype=dtype) + + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + ser.astype(int) + def test_astype_cast_object_int_fail(self, any_int_numpy_dtype): arr = Series(["car", "house", "tree", "1"]) msg = r"invalid literal for int\(\) with base 10: 'car'"