Skip to content
40 changes: 36 additions & 4 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,11 +479,20 @@ def _array_strptime_with_fallback(
return Index(result, dtype=result.dtype, name=name)


def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> DatetimeIndex:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making the annotation -> DatetimeIndex reflects that reality more precisely and helps both developers and tooling (like mypy) be certain of the return type.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If errors=="ignore" we would get an Index[object] back so this is correct

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Thanks for pointing this out.

"""
to_datetime specalized to the case where a 'unit' is passed.
to_datetime specialized to the case where a 'unit' is passed.

Note: This function currently treats values at the upper bound differently
from values at the lower bound.
For upper bound, it raises OutOfBoundsDatetime.
For lower bound, it returns NaT.
"""
arg = extract_array(arg, extract_numpy=True)
# Fix GH#60677
# Ensure scalar and array-like both become arrays
# (so both paths use the same code).
arg = np.atleast_1d(arg)

# GH#30050 pass an ndarray to tslib.array_to_datetime
# because it expects an ndarray argument
Expand All @@ -496,6 +505,31 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
if arg.dtype.kind in "iu":
# Note we can't do "f" here because that could induce unwanted
# rounding GH#14156, GH#20445
# Fix GH#60677
# ------------------------------------------------
# A) **Check for uint64 values above int64 max**
# so we don't accidentally wrap around to -1, etc.
# ------------------------------------------------
if arg.dtype.kind == "u": # unsigned
above_max = arg > np.iinfo(np.int64).max
if above_max.any():
if errors == "raise":
raise OutOfBoundsDatetime(
"Cannot convert uint64 values above"
f"{np.iinfo(np.int64).max}"
"to a 64-bit signed datetime64[ns]."
)
else:
# For errors != "raise" (e.g. "coerce" or "ignore"),
# we can replace out-of-range entries with NaN (-> NaT),
# then switch to the fallback object path:
arg = arg.astype(object)
arg[above_max] = np.nan
return _to_datetime_with_unit(arg, unit, name, utc, errors)

# ------------------------------------------------
# B) Proceed with normal numeric -> datetime logic
# ------------------------------------------------
arr = arg.astype(f"datetime64[{unit}]", copy=False)
try:
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
Expand Down Expand Up @@ -532,8 +566,6 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
)

result = DatetimeIndex(arr, name=name)
if not isinstance(result, DatetimeIndex):
return result

# GH#23758: We may still need to localize the result with tz
# GH#25546: Apply tz_parsed first (from arg), then tz (from caller)
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3689,3 +3689,30 @@ def test_to_datetime_wrapped_datetime64_ps():
["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None
)
tm.assert_index_equal(result, expected)


def test_to_datetime_scalar_out_of_bounds():
"""Ensure pd.to_datetime raises an error for out-of-bounds scalar values."""
uint64_max = np.iinfo("uint64").max
int64_min = np.iinfo("int64").min

# Expect an OverflowError when passing uint64_max as a scalar
with pytest.raises(OutOfBoundsDatetime):
to_datetime(uint64_max, unit="ns")

# Expect the same behavior when passing it as a list
with pytest.raises(OutOfBoundsDatetime):
to_datetime([uint64_max], unit="ns")

# Expect NAT when passing int64_min as a scalar
value = to_datetime(int64_min, unit="ns")
assert value is NaT

# Expect the same behavior when passing it as a list
value = to_datetime([int64_min], unit="ns")
assert value[0] is NaT

# Test a valid value (should not raise an error)
valid_timestamp = 1_700_000_000_000_000_000 # A reasonable nanosecond timestamp
result = to_datetime(valid_timestamp, unit="ns")
assert isinstance(result, Timestamp)
Loading