-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
ENH: add fold support to Timestamp constructor #31563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 92 commits
84bfff2
ba7fcd5
0b6f894
5c58b3a
546789a
fc69bbb
57d42b3
f2ad196
935a3ec
d35af8f
e6d4aaa
fd98b27
af86f79
a6965e9
4caf9bb
6843ed2
ebbf21f
4f43638
6238f9b
3584791
237341b
12b8b4e
92e990e
c2189d3
9c9c2dd
f0bbbcb
ca46078
411b036
464dadf
2b9f2f6
ef87010
ecfde58
6970ed1
353e554
bce8f0d
558c237
7b88ffd
9621c0a
6bf58b5
73364af
a8ad96c
104c97d
1f9e810
55f0b8a
5e1be83
bda4934
1065085
f9c6956
62d5d6b
6294ee9
f13e3d7
eade807
7269f9a
d650086
693cb6c
2fe9ce7
3e2c76c
2f4fdda
9f7a16e
a6d37ea
a017953
1d716e7
b47efe0
cf7c091
a49a7e9
c39c490
75e1633
21883be
b21cb47
3ca1fc3
edde445
d5925af
b128cde
a673b65
276fad7
5540de1
30eef01
68b05fc
d67ec4f
3262085
4790b76
353bd87
94e9e65
a69833a
e58ecb9
e1ffa8d
c57ec65
f3f8690
476c4a4
afaeb88
4a33d36
82ed93c
d9aea09
d68efb6
9328071
4ecbaf1
ee90ac7
25291e4
08cc256
2910720
f6c11da
6f16ea5
5024452
3e49b7a
bcf0905
9b614ae
467a11e
2145b05
8f82aa1
d39e811
3091689
4cfac36
e58fe0c
d166a67
5729eb8
c9863e1
1d72e2d
97883ce
aa5232b
0ebbe02
c840fd6
a793bb5
920d52a
d732139
752acbc
a1f69cf
97dc342
7ac14df
397b2c8
757bd41
46a279b
81560bb
4256642
a24594a
0168aa6
3a605c3
cd02318
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2220,6 +2220,22 @@ you can use the ``tz_convert`` method. | |
|
||
rng_pytz.tz_convert('US/Eastern') | ||
|
||
.. versionadded:: 1.1.0 | ||
|
||
For ambiguous times, pandas supports explicitly specifying the fold argument. | ||
|
||
Due to daylight saving time, one wall clock time can occur twice when shifting | ||
from summer to winter time; fold describes whether the datetime-like corresponds | ||
to the first (0) or the second time (1) the wall clock hits the ambiguous time. | ||
Fold is supported only for constructing from naive datetime or :class:`Timestamp` | ||
or for constructing from components (see below). | ||
|
||
.. ipython:: python | ||
|
||
pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), | ||
tz='dateutil/Europe/London', fold=0) | ||
pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, | ||
tz='dateutil/Europe/London', fold=1) | ||
|
||
.. note:: | ||
|
||
When using ``pytz`` time zones, :class:`DatetimeIndex` will construct a different | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,26 @@ For example: | |
ser["2014"] | ||
ser.loc["May 2015"] | ||
|
||
.. _whatsnew_110.timestamp_fold_support: | ||
|
||
Fold argument support in Timestamp constructor | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
:class:`Timestamp: now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). | ||
|
||
|
||
For example: | ||
|
||
.. ipython:: python | ||
|
||
ts = pd.Timestamp("2019-10-27 01:30:00+00:00") | ||
ts.fold | ||
|
||
.. ipython:: python | ||
|
||
ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, | ||
tz="dateutil/Europe/London", fold=1) | ||
ts | ||
|
||
.. _whatsnew_110.enhancements.other: | ||
|
||
Other enhancements | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -216,6 +216,7 @@ cdef class _TSObject: | |
# npy_datetimestruct dts # npy_datetimestruct | ||
# int64_t value # numpy dt64 | ||
# object tzinfo | ||
# bint fold | ||
|
||
@property | ||
def value(self): | ||
|
@@ -323,6 +324,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, | |
cdef: | ||
_TSObject obj = _TSObject() | ||
|
||
obj.fold = ts.fold | ||
if tz is not None: | ||
tz = maybe_get_tz(tz) | ||
|
||
|
@@ -355,6 +357,24 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, | |
obj.value += nanos | ||
obj.dts.ps = nanos * 1000 | ||
|
||
if tz is not None: | ||
mroeschke marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
AlexKirko marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
if is_utc(tz) or is_tzlocal(tz): | ||
# TODO: think on how we can infer fold for local Timezone | ||
# and adjust value for fold | ||
pass | ||
else: | ||
trans, deltas, typ = get_dst_info(tz) | ||
|
||
if typ in ['pytz', 'dateutil']: | ||
pos = trans.searchsorted(obj.value, side='right') - 1 | ||
# pytz assumes fold == 1, dateutil fold == 0 | ||
# adjust only if necessary | ||
if (typ == 'pytz' and obj.fold == 0 or | ||
typ == 'dateutil' and obj.fold == 1): | ||
pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, | ||
obj.fold) | ||
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) | ||
mroeschke marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
check_dts_bounds(&obj.dts) | ||
check_overflows(obj) | ||
return obj | ||
|
@@ -381,6 +401,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, | |
_TSObject obj = _TSObject() | ||
int64_t value # numpy dt64 | ||
datetime dt | ||
bint fold | ||
|
||
value = dtstruct_to_dt64(&dts) | ||
obj.dts = dts | ||
|
@@ -390,10 +411,22 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, | |
check_overflows(obj) | ||
return obj | ||
|
||
# Infer fold from offset-adjusted obj.value | ||
if is_utc(tz) or is_tzlocal(tz): | ||
# TODO: think on how we can infer fold for local Timezone | ||
# and adjust value for fold | ||
pass | ||
else: | ||
trans, deltas, typ = get_dst_info(tz) | ||
|
||
if typ in ['pytz', 'dateutil']: | ||
pos = trans.searchsorted(obj.value, side='right') - 1 | ||
fold = _infer_tsobject_fold(obj, trans, deltas, pos) | ||
|
||
# Keep the converter same as PyDateTime's | ||
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, | ||
obj.dts.hour, obj.dts.min, obj.dts.sec, | ||
obj.dts.us, obj.tzinfo) | ||
obj.dts.us, obj.tzinfo, fold=fold) | ||
|
||
obj = convert_datetime_to_tsobject( | ||
dt, tz, nanos=obj.dts.ps // 1000) | ||
return obj | ||
|
@@ -528,7 +561,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): | |
|
||
Notes | ||
----- | ||
Sets obj.tzinfo inplace, alters obj.dts inplace. | ||
Sets obj.tzinfo inplace, alters obj.dts inplace, alters obj.value inplace. | ||
AlexKirko marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
""" | ||
cdef: | ||
ndarray[int64_t] trans | ||
|
@@ -546,6 +579,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): | |
elif is_tzlocal(tz): | ||
local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) | ||
dt64_to_dtstruct(local_val, &obj.dts) | ||
# TODO: think on how we can infer fold for local Timezone | ||
|
||
# and adjust value for fold | ||
else: | ||
# Adjust datetime64 timestamp, recompute datetimestruct | ||
trans, deltas, typ = get_dst_info(tz) | ||
|
@@ -554,15 +589,13 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): | |
# static/fixed tzinfo; in this case we know len(deltas) == 1 | ||
# This can come back with `typ` of either "fixed" or None | ||
dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) | ||
elif typ == 'pytz': | ||
# i.e. treat_tz_as_pytz(tz) | ||
pos = trans.searchsorted(obj.value, side='right') - 1 | ||
tz = tz._tzinfos[tz._transition_info[pos]] | ||
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) | ||
elif typ == 'dateutil': | ||
# i.e. treat_tz_as_dateutil(tz) | ||
elif typ in ['pytz', 'dateutil']: | ||
AlexKirko marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
pos = trans.searchsorted(obj.value, side='right') - 1 | ||
if typ == 'pytz': | ||
tz = tz._tzinfos[tz._transition_info[pos]] | ||
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) | ||
|
||
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) | ||
mroeschke marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
# Note: as of 2018-07-17 all tzinfo objects that are _not_ | ||
# either pytz or dateutil have is_fixed_offset(tz) == True, | ||
|
@@ -572,6 +605,87 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): | |
obj.tzinfo = tz | ||
|
||
|
||
cdef inline int32_t _adjust_tsobject_for_fold(_TSObject obj, object trans, | ||
object deltas, int32_t pos, | ||
bint fold): | ||
""" | ||
Adjust _TSObject value for fold is possible. Return updated last offset | ||
|
||
transition position in the trans list. | ||
|
||
Parameters | ||
---------- | ||
obj : _TSObject | ||
trans : object | ||
List of offset transition points in nanoseconds since epoch. | ||
deltas : object | ||
List of offsets corresponding to transition points in trans. | ||
pos : int32_t | ||
Position of the last transition point before taking fold into account. | ||
fold : bint | ||
Due to daylight saving time, one wall clock time can occur twice | ||
when shifting from summer to winter time; fold describes whether the | ||
datetime-like corresponds to the first (0) or the second time (1) | ||
the wall clock hits the ambiguous time | ||
|
||
Returns | ||
------- | ||
int32_t | ||
Position of the last transition point after taking fold into account. | ||
|
||
Notes | ||
----- | ||
Alters obj.value inplace. | ||
""" | ||
if fold == 0: | ||
if pos > 0: | ||
fold_delta = deltas[pos - 1] - deltas[pos] | ||
if obj.value - fold_delta < trans[pos]: | ||
obj.value -= fold_delta | ||
pos -= 1 | ||
elif fold == 1: | ||
if pos < len(deltas): | ||
fold_delta = deltas[pos] - deltas[pos + 1] | ||
if obj.value + fold_delta > trans[pos + 1]: | ||
obj.value += fold_delta | ||
pos += 1 | ||
|
||
return pos | ||
|
||
|
||
cdef inline bint _infer_tsobject_fold(_TSObject obj, object trans, | ||
object deltas, int32_t pos): | ||
|
||
""" | ||
Infer _TSObject fold property from value by assuming 0 and then setting | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
to 1 if necessary. | ||
|
||
Parameters | ||
---------- | ||
obj : _TSObject | ||
trans : object | ||
List of offset transition points in nanoseconds since epoch. | ||
deltas : object | ||
List of offsets corresponding to transition points in trans. | ||
pos : int32_t | ||
Position of the last transition point before taking fold into account. | ||
|
||
Returns | ||
------- | ||
bint | ||
Due to daylight saving time, one wall clock time can occur twice | ||
when shifting from summer to winter time; fold describes whether the | ||
datetime-like corresponds to the first (0) or the second time (1) | ||
the wall clock hits the ambiguous time | ||
""" | ||
cdef: | ||
bint fold = 0 | ||
|
||
if pos > 0: | ||
fold_delta = deltas[pos - 1] - deltas[pos] | ||
if obj.value - fold_delta < trans[pos]: | ||
fold = 1 | ||
|
||
return fold | ||
|
||
cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): | ||
""" | ||
Take a datetime/Timestamp in UTC and localizes to timezone tz. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would make this a note::
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.