Skip to content

Commit 7421077

Browse files
API: timestamp resolution inference: default to microseconds when possible
1 parent e4a03b6 commit 7421077

File tree

6 files changed

+171
-51
lines changed

6 files changed

+171
-51
lines changed

pandas/_libs/tslib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ cpdef array_to_datetime(
355355
iresult[i] = parse_pydatetime(val, &dts, creso=creso)
356356

357357
elif PyDate_Check(val):
358-
item_reso = NPY_DATETIMEUNIT.NPY_FR_s
358+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
359359
state.update_creso(item_reso)
360360
if infer_reso:
361361
creso = state.creso

pandas/_libs/tslibs/conversion.pyx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ from pandas._libs.missing cimport checknull_with_nat_and_na
3333
from pandas._libs.tslibs.dtypes cimport (
3434
abbrev_to_npy_unit,
3535
get_supported_reso,
36+
get_supported_reso_for_dts,
3637
npy_unit_to_attrname,
3738
periods_per_second,
3839
)
@@ -507,6 +508,9 @@ cdef _TSObject convert_datetime_to_tsobject(
507508
if nanos:
508509
obj.dts.ps = nanos * 1000
509510

511+
reso = get_supported_reso_for_dts(reso, &obj.dts)
512+
obj.creso = reso
513+
510514
try:
511515
obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts)
512516
except OverflowError as err:
@@ -622,7 +626,7 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
622626
&out_tzoffset, False
623627
)
624628
if not string_to_dts_failed:
625-
reso = get_supported_reso(out_bestunit)
629+
reso = get_supported_reso_for_dts(out_bestunit, &dts)
626630
check_dts_bounds(&dts, reso)
627631
obj = _TSObject()
628632
obj.dts = dts

pandas/_libs/tslibs/dtypes.pxd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from numpy cimport int64_t
22

3-
from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
3+
from pandas._libs.tslibs.np_datetime cimport (
4+
NPY_DATETIMEUNIT,
5+
npy_datetimestruct,
6+
)
47

58

69
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
@@ -9,6 +12,9 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil
912
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
1013
cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
1114
cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso)
15+
cdef NPY_DATETIMEUNIT get_supported_reso_for_dts(
16+
NPY_DATETIMEUNIT reso, npy_datetimestruct* dts
17+
)
1218
cdef bint is_supported_unit(NPY_DATETIMEUNIT reso)
1319

1420
cdef dict c_OFFSET_TO_PERIOD_FREQSTR

pandas/_libs/tslibs/dtypes.pyx

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,21 @@
22
# originals
33
from enum import Enum
44

5+
import numpy as np
6+
7+
from cpython.object cimport (
8+
Py_GE,
9+
Py_LE,
10+
)
11+
512
from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS
613
from pandas._libs.tslibs.np_datetime cimport (
714
NPY_DATETIMEUNIT,
15+
cmp_dtstructs,
816
get_conversion_factor,
917
import_pandas_datetime,
18+
npy_datetimestruct,
19+
pandas_datetime_to_datetimestruct,
1020
)
1121

1222
import_pandas_datetime()
@@ -504,6 +514,36 @@ cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso):
504514
return reso
505515

506516

517+
cdef npy_datetimestruct dts_us_min, dts_us_max
518+
pandas_datetime_to_datetimestruct(
519+
np.iinfo(np.int64).min + 1, NPY_DATETIMEUNIT.NPY_FR_us, &dts_us_min
520+
)
521+
pandas_datetime_to_datetimestruct(
522+
np.iinfo(np.int64).max, NPY_DATETIMEUNIT.NPY_FR_us, &dts_us_max
523+
)
524+
525+
526+
cdef NPY_DATETIMEUNIT get_supported_reso_for_dts(
527+
NPY_DATETIMEUNIT reso, npy_datetimestruct* dts
528+
):
529+
# Similar as above, but taking the actual datetime value in account,
530+
# defaulting to 'us' if possible.
531+
if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
532+
return NPY_DATETIMEUNIT.NPY_FR_ns
533+
if reso < NPY_DATETIMEUNIT.NPY_FR_us:
534+
if (
535+
cmp_dtstructs(dts, &dts_us_min, Py_GE)
536+
and cmp_dtstructs(dts, &dts_us_max, Py_LE)
537+
):
538+
return NPY_DATETIMEUNIT.NPY_FR_us
539+
else:
540+
# TODO still distinguish between ms or s?
541+
return NPY_DATETIMEUNIT.NPY_FR_s
542+
elif reso > NPY_DATETIMEUNIT.NPY_FR_ns:
543+
return NPY_DATETIMEUNIT.NPY_FR_ns
544+
return reso
545+
546+
507547
cdef bint is_supported_unit(NPY_DATETIMEUNIT reso):
508548
return (
509549
reso == NPY_DATETIMEUNIT.NPY_FR_ns

pandas/_libs/tslibs/strptime.pyx

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ from pandas._libs.tslibs.conversion cimport (
5353
)
5454
from pandas._libs.tslibs.dtypes cimport (
5555
get_supported_reso,
56+
get_supported_reso_for_dts,
5657
npy_unit_to_abbrev,
5758
npy_unit_to_attrname,
5859
)
@@ -421,7 +422,7 @@ def array_strptime(
421422
continue
422423
elif PyDate_Check(val):
423424
state.found_other = True
424-
item_reso = NPY_DATETIMEUNIT.NPY_FR_s
425+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
425426
state.update_creso(item_reso)
426427
if infer_reso:
427428
creso = state.creso
@@ -460,7 +461,7 @@ def array_strptime(
460461
if string_to_dts_succeeded:
461462
# No error reported by string_to_dts, pick back up
462463
# where we left off
463-
item_reso = get_supported_reso(out_bestunit)
464+
item_reso = get_supported_reso_for_dts(out_bestunit, &dts)
464465
state.update_creso(item_reso)
465466
if infer_reso:
466467
creso = state.creso
@@ -622,7 +623,7 @@ cdef tzinfo _parse_with_format(
622623
f"time data \"{val}\" doesn't match format \"{fmt}\""
623624
)
624625

625-
item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_s
626+
item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us
626627

627628
iso_year = -1
628629
year = 1900
@@ -710,11 +711,7 @@ cdef tzinfo _parse_with_format(
710711
elif parse_code == 10:
711712
# e.g. val='10:10:10.100'; fmt='%H:%M:%S.%f'
712713
s = found_dict["f"]
713-
if len(s) <= 3:
714-
item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ms
715-
elif len(s) <= 6:
716-
item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us
717-
else:
714+
if len(s) > 6:
718715
item_reso[0] = NPY_FR_ns
719716
# Pad to always return nanoseconds
720717
s += "0" * (9 - len(s))

0 commit comments

Comments
 (0)