Skip to content

Commit 083f01a

Browse files
committed
fix: pd.to_numeric handling of datetime
1 parent 1863adb commit 083f01a

File tree

6 files changed

+72
-4
lines changed

6 files changed

+72
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,6 +1191,7 @@ Other
11911191
- Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`)
11921192
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
11931193
- Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`)
1194+
- Bug in :func:`to_numeric` for ``datetime``, :class:`Series` and ``NaT`` conversions. (:issue:`43280`)
11941195
- Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`)
11951196
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
11961197
- Bug in :meth:`DataFrame.apply` raising ``RecursionError`` when passing ``func=list[int]``. (:issue:`61565`)

pandas/_libs/lib.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def maybe_convert_numeric(
124124
na_values: set,
125125
convert_empty: bool = ...,
126126
coerce_numeric: bool = ...,
127+
convert_datetime: bool = ...,
127128
convert_to_masked_nullable: Literal[False] = ...,
128129
) -> tuple[np.ndarray, None]: ...
129130
@overload

pandas/_libs/lib.pyx

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2300,6 +2300,7 @@ def maybe_convert_numeric(
23002300
set na_values,
23012301
bint convert_empty=True,
23022302
bint coerce_numeric=False,
2303+
bint convert_datetime=True,
23032304
bint convert_to_masked_nullable=False,
23042305
) -> tuple[np.ndarray, np.ndarray | None]:
23052306
"""
@@ -2449,6 +2450,18 @@ def maybe_convert_numeric(
24492450
elif is_decimal(val):
24502451
floats[i] = complexes[i] = val
24512452
seen.float_ = True
2453+
elif convert_datetime and (PyDate_Check(val) or cnp.is_datetime64_object(val)):
2454+
# convert_datetime flag avoids conversion for base_readers
2455+
# PyDate_Check also includes PyDatetime_Check
2456+
seen.datetime_ = True
2457+
if val in na_values or checknull(val):
2458+
seen.saw_null()
2459+
mask[i] = 1
2460+
floats[i] = NaN
2461+
else:
2462+
ints[i] = np.datetime64(val).astype(int)
2463+
# because of pd.NaT, we may need to return in floats #GH 42380
2464+
floats[i] = float(ints[i])
24522465
else:
24532466
try:
24542467
floatify(val, &fval, &maybe_int)
@@ -2517,7 +2530,7 @@ def maybe_convert_numeric(
25172530
if seen.null_ and convert_to_masked_nullable:
25182531
return (floats, mask.view(np.bool_))
25192532
return (floats, None)
2520-
elif seen.int_:
2533+
elif seen.int_ or seen.datetime_:
25212534
if seen.null_ and convert_to_masked_nullable:
25222535
if seen.uint_:
25232536
return (uints, mask.view(np.bool_))

pandas/core/tools/numeric.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,12 @@ def to_numeric(
204204
return float(arg)
205205
if is_number(arg):
206206
return arg
207-
if isinstance(arg, (Timedelta, Timestamp)):
207+
if isinstance(arg, Timedelta):
208208
return arg._value
209+
if isinstance(arg, Timestamp):
210+
if arg.tzinfo:
211+
arg = arg.tz_convert("UTC").replace(tzinfo=None)
212+
209213
is_scalars = True
210214
values = np.array([arg], dtype="O")
211215
elif getattr(arg, "ndim", 1) > 1:
@@ -227,8 +231,6 @@ def to_numeric(
227231
new_mask: np.ndarray | None = None
228232
if is_numeric_dtype(values_dtype):
229233
pass
230-
elif lib.is_np_dtype(values_dtype, "mM"):
231-
values = values.view(np.int64)
232234
else:
233235
values = ensure_object(values)
234236
coerce_numeric = errors != "raise"

pandas/io/parsers/base_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ def _infer_types(
490490
values,
491491
na_values,
492492
False,
493+
convert_datetime=False,
493494
convert_to_masked_nullable=non_default_dtype_backend, # type: ignore[arg-type]
494495
)
495496
except (ValueError, TypeError):

pandas/tests/tools/test_to_numeric.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
from datetime import datetime
12
import decimal
3+
from functools import partial
24

35
import numpy as np
46
from numpy import iinfo
@@ -902,6 +904,54 @@ def test_to_numeric_dtype_backend_error(dtype_backend):
902904
tm.assert_series_equal(result, expected)
903905

904906

907+
@pytest.mark.parametrize(
908+
"input_value, expected, pd_type",
909+
[
910+
(datetime(2021, 8, 22), 1629590400000000, "scalar"),
911+
(datetime(2025, 2, 21), 1740096000000000, "scalar"),
912+
(pd.NaT, np.nan, "scalar"),
913+
([datetime(2021, 8, 22)], [1629590400000000], "series"),
914+
([datetime(2025, 2, 21)], [1740096000000000], "series"),
915+
([pd.NaT], [np.nan], "series"),
916+
([datetime(2021, 8, 22), pd.NaT], [float(1629590400000000), np.nan], "series"),
917+
([pd.NaT, datetime(2021, 8, 22)], [np.nan, float(1629590400000000)], "series"),
918+
(
919+
["apple", 1, datetime(2021, 8, 22)],
920+
[np.nan, float(1.0), float(1629590400000000)],
921+
"series_coerce",
922+
),
923+
([pd.NaT], [np.nan], "series_partial"),
924+
([datetime(2025, 2, 21)], [1740096000000000], "series_partial"),
925+
(
926+
[pd.NaT, datetime(2025, 2, 21)],
927+
[np.nan, float(1740096000000000)],
928+
"series_partial",
929+
),
930+
],
931+
)
932+
def test_to_numeric_datetime(input_value, expected, pd_type):
933+
"""Test converting a scalar datetime to numeric."""
934+
if pd_type == "scalar":
935+
val = to_numeric(input_value)
936+
# special handling because Nan!=Nan
937+
if pd.isna(expected):
938+
assert pd.isna(val)
939+
else:
940+
assert val == expected
941+
942+
elif pd_type == "series":
943+
val = to_numeric(Series(input_value))
944+
tm.assert_series_equal(val, Series(expected))
945+
946+
elif pd_type == "series_coerce":
947+
val = to_numeric(Series(input_value), errors="coerce")
948+
tm.assert_series_equal(val, Series(expected))
949+
950+
elif pd_type == "series_partial":
951+
val = Series(input_value).apply(partial(to_numeric))
952+
tm.assert_series_equal(val, Series(expected))
953+
954+
905955
def test_invalid_dtype_backend():
906956
ser = Series([1, 2, 3])
907957
msg = (

0 commit comments

Comments
 (0)