Skip to content

Commit c55d1d5

Browse files
committed
ENH: Improve error handling for out-of-bounds uint64 values in _to_datetime_with_unit
1 parent 4a2ad59 commit c55d1d5

File tree

2 files changed

+38
-33
lines changed

2 files changed

+38
-33
lines changed

pandas/core/tools/datetimes.py

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
from pandas.core.construction import extract_array
7676
from pandas.core.indexes.base import Index
7777
from pandas.core.indexes.datetimes import DatetimeIndex
78+
7879
if TYPE_CHECKING:
7980
from collections.abc import (
8081
Callable,
@@ -478,72 +479,77 @@ def _array_strptime_with_fallback(
478479
return Index(result, dtype=result.dtype, name=name)
479480

480481

481-
482-
483-
484482
def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> DatetimeIndex:
485483
"""
486484
to_datetime specialized to the case where a 'unit' is passed.
487-
Fixes a bug where scalar out-of-bounds values were not raising
488-
an error consistently.
489485
"""
490-
import pdb; pdb.set_trace()
491-
492-
# Ensure we handle both array-likes and scalars the same way.
493-
# extract_array can return a scalar if 'arg' is scalar-like;
494-
# so we force everything into at least 1D shape.
495486
arg = extract_array(arg, extract_numpy=True)
487+
# Fix GH#60677
488+
# Ensure scalar and array-like both become arrays
489+
# (so both paths use the same code).
496490
arg = np.atleast_1d(arg)
497491

498492
# GH#30050 pass an ndarray to tslib.array_to_datetime
499493
# because it expects an ndarray argument
500494
if isinstance(arg, IntegerArray):
501-
# For IntegerArray, we can directly convert
502495
arr = arg.astype(f"datetime64[{unit}]")
503496
tz_parsed = None
504-
505497
else:
506-
# Now we have a guaranteed ndarray
507498
arg = np.asarray(arg)
508499

509500
if arg.dtype.kind in "iu":
510501
# Note we can't do "f" here because that could induce unwanted
511-
# rounding GH#14156, GH#20445
502+
# rounding GH#14156, GH#20445
503+
# Fix GH#60677
504+
# ------------------------------------------------
505+
# A) **Check for uint64 values above int64 max**
506+
# so we don't accidentally wrap around to -1, etc.
507+
# ------------------------------------------------
508+
if arg.dtype.kind == "u": # unsigned
509+
above_max = arg > np.iinfo(np.int64).max
510+
if above_max.any():
511+
if errors == "raise":
512+
raise OutOfBoundsDatetime(
513+
f"Cannot convert uint64 values above {np.iinfo(np.int64).max} "
514+
"to a 64-bit signed datetime64[ns]."
515+
)
516+
else:
517+
# For errors != "raise" (e.g. "coerce" or "ignore"),
518+
# we can replace out-of-range entries with NaN (-> NaT),
519+
# then switch to the fallback object path:
520+
arg = arg.astype(object)
521+
arg[above_max] = np.nan
522+
return _to_datetime_with_unit(arg, unit, name, utc, errors)
523+
524+
# ------------------------------------------------
525+
# B) Proceed with normal numeric -> datetime logic
526+
# ------------------------------------------------
512527
arr = arg.astype(f"datetime64[{unit}]", copy=False)
513528
try:
514529
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
515530
except OutOfBoundsDatetime:
516531
if errors == "raise":
517532
raise
518-
# errors != "raise" => coerce to object and retry
519533
arg = arg.astype(object)
520534
return _to_datetime_with_unit(arg, unit, name, utc, errors)
521535
tz_parsed = None
522536

523537
elif arg.dtype.kind == "f":
524-
# Floating dtypes
525538
with np.errstate(over="raise"):
526539
try:
527540
arr = cast_from_unit_vectorized(arg, unit=unit)
528541
except OutOfBoundsDatetime as err:
529542
if errors != "raise":
530-
# coerce to object and retry
531543
return _to_datetime_with_unit(
532-
arg.astype(object),
533-
unit,
534-
name,
535-
utc,
536-
errors,
544+
arg.astype(object), unit, name, utc, errors
537545
)
538546
raise OutOfBoundsDatetime(
539547
f"cannot convert input with unit '{unit}'"
540548
) from err
541549

542550
arr = arr.view("M8[ns]")
543551
tz_parsed = None
544-
545552
else:
546-
# Fallback: treat as object dtype
547553
arg = arg.astype(object, copy=False)
548554
arr, tz_parsed = tslib.array_to_datetime(
549555
arg,
@@ -553,22 +559,21 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> DatetimeI
553559
creso=NpyDatetimeUnit.NPY_FR_ns.value,
554560
)
555561

556-
# Construct a DatetimeIndex from the array
557562
result = DatetimeIndex(arr, name=name)
558563

559-
# May need to localize result to parsed tz or convert to UTC if requested
564+
# GH#23758: We may still need to localize the result with tz
565+
# GH#25546: Apply tz_parsed first (from arg), then tz (from caller)
566+
# result will be naive but in UTC
560567
result = result.tz_localize("UTC").tz_convert(tz_parsed)
561568

562569
if utc:
563570
if result.tz is None:
564571
result = result.tz_localize("utc")
565572
else:
566573
result = result.tz_convert("utc")
567-
568574
return result
569575

570576

571-
572577
def _adjust_to_origin(arg, origin, unit):
573578
"""
574579
Helper function for to_datetime.

pandas/tests/tools/test_to_datetime.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""test to_datetime"""
22

33
import calendar
4-
import locale
54
from collections import deque
65
from datetime import (
76
date,
@@ -10,12 +9,12 @@
109
timezone,
1110
)
1211
from decimal import Decimal
12+
import locale
1313
import zoneinfo
1414

15+
from dateutil.parser import parse
1516
import numpy as np
16-
import pandas as pd
1717
import pytest
18-
from dateutil.parser import parse
1918

2019
from pandas._libs import tslib
2120
from pandas._libs.tslibs import (
@@ -30,6 +29,8 @@
3029
import pandas.util._test_decorators as td
3130

3231
from pandas.core.dtypes.common import is_datetime64_ns_dtype
32+
33+
import pandas as pd
3334
from pandas import (
3435
DataFrame,
3536
DatetimeIndex,
@@ -3705,5 +3706,4 @@ def test_to_datetime_scalar_out_of_bounds():
37053706
# Test a valid value (should not raise an error)
37063707
valid_timestamp = 1_700_000_000_000_000_000 # A reasonable nanosecond timestamp
37073708
result = pd.to_datetime(valid_timestamp, unit="ns")
3708-
assert isinstance(result, pd.Timestamp)
3709-
3709+
assert isinstance(result, pd.Timestamp)

0 commit comments

Comments
 (0)