Skip to content

Commit bd76016

Browse files
committed
API: make Day preserve time-of-day across DST transitions
1 parent fe07fd5 commit bd76016

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+331
-123
lines changed

doc/source/reference/offset_frequency.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1107,7 +1107,6 @@ Properties
11071107
.. autosummary::
11081108
:toctree: api/
11091109

1110-
Day.delta
11111110
Day.freqstr
11121111
Day.kwds
11131112
Day.name

doc/source/user_guide/timedeltas.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Further, operations among the scalars yield another scalar ``Timedelta``.
6363

6464
.. ipython:: python
6565
66-
pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta(
66+
pd.Timedelta(pd.offsets.Hour(48)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta(
6767
"00:00:00.000123"
6868
)
6969

pandas/_libs/tslibs/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
"is_supported_unit",
3434
"npy_unit_to_abbrev",
3535
"get_supported_reso",
36+
"Day",
3637
]
3738

3839
from pandas._libs.tslibs import dtypes # pylint: disable=import-self
@@ -60,6 +61,7 @@
6061
)
6162
from pandas._libs.tslibs.offsets import (
6263
BaseOffset,
64+
Day,
6365
Tick,
6466
to_offset,
6567
)

pandas/_libs/tslibs/offsets.pyi

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class BaseOffset:
9494
@property
9595
def nanos(self) -> int: ...
9696
def is_anchored(self) -> bool: ...
97+
def _maybe_to_hours(self) -> BaseOffset: ...
9798

9899
def _get_offset(name: str) -> BaseOffset: ...
99100

@@ -120,7 +121,9 @@ class Tick(SingleConstructorOffset):
120121

121122
def delta_to_tick(delta: timedelta) -> Tick: ...
122123

123-
class Day(Tick): ...
124+
class Day(BaseOffset):
125+
def _maybe_to_hours(self) -> Hour: ...
126+
124127
class Hour(Tick): ...
125128
class Minute(Tick): ...
126129
class Second(Tick): ...

pandas/_libs/tslibs/offsets.pyx

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,11 @@ cdef class BaseOffset:
801801
def nanos(self):
802802
raise ValueError(f"{self} is a non-fixed frequency")
803803

804+
def _maybe_to_hours(self):
805+
if not isinstance(self, Day):
806+
return self
807+
return Hour(self.n * 24)
808+
804809
def is_anchored(self) -> bool:
805810
# TODO: Does this make sense for the general case? It would help
806811
# if there were a canonical docstring for what is_anchored means.
@@ -939,8 +944,6 @@ cdef class Tick(SingleConstructorOffset):
939944
# Note: Without making this cpdef, we get AttributeError when calling
940945
# from __mul__
941946
cpdef Tick _next_higher_resolution(Tick self):
942-
if type(self) is Day:
943-
return Hour(self.n * 24)
944947
if type(self) is Hour:
945948
return Minute(self.n * 60)
946949
if type(self) is Minute:
@@ -1099,7 +1102,7 @@ cdef class Tick(SingleConstructorOffset):
10991102
self.normalize = False
11001103

11011104

1102-
cdef class Day(Tick):
1105+
cdef class Day(SingleConstructorOffset):
11031106
"""
11041107
Offset ``n`` days.
11051108
@@ -1129,11 +1132,41 @@ cdef class Day(Tick):
11291132
>>> ts + Day(-4)
11301133
Timestamp('2022-12-05 15:00:00')
11311134
"""
1135+
_adjust_dst = True
1136+
_attributes = tuple(["n", "normalize"])
11321137
_nanos_inc = 24 * 3600 * 1_000_000_000
11331138
_prefix = "D"
11341139
_period_dtype_code = PeriodDtypeCode.D
11351140
_creso = NPY_DATETIMEUNIT.NPY_FR_D
11361141

1142+
def __init__(self, n=1, normalize=False):
1143+
BaseOffset.__init__(self, n)
1144+
if normalize:
1145+
# GH#21427
1146+
raise ValueError(
1147+
"Day offset with `normalize=True` are not allowed."
1148+
)
1149+
1150+
def is_on_offset(self, dt) -> bool:
1151+
return True
1152+
1153+
@apply_wraps
1154+
def _apply(self, other):
1155+
if isinstance(other, Day):
1156+
# TODO: why isn't this handled in __add__?
1157+
return Day(self.n + other.n)
1158+
return other + np.timedelta64(self.n, "D")
1159+
1160+
@apply_array_wraps
1161+
def _apply_array(self, dtarr):
1162+
return dtarr + np.timedelta64(self.n, "D")
1163+
1164+
@cache_readonly
1165+
def freqstr(self) -> str:
1166+
if self.n != 1:
1167+
return str(self.n) + "D"
1168+
return "D"
1169+
11371170

11381171
cdef class Hour(Tick):
11391172
"""
@@ -1267,16 +1300,13 @@ cdef class Nano(Tick):
12671300
def delta_to_tick(delta: timedelta) -> Tick:
12681301
if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0:
12691302
# nanoseconds only for pd.Timedelta
1270-
if delta.seconds == 0:
1271-
return Day(delta.days)
1303+
seconds = delta.days * 86400 + delta.seconds
1304+
if seconds % 3600 == 0:
1305+
return Hour(seconds / 3600)
1306+
elif seconds % 60 == 0:
1307+
return Minute(seconds / 60)
12721308
else:
1273-
seconds = delta.days * 86400 + delta.seconds
1274-
if seconds % 3600 == 0:
1275-
return Hour(seconds / 3600)
1276-
elif seconds % 60 == 0:
1277-
return Minute(seconds / 60)
1278-
else:
1279-
return Second(seconds)
1309+
return Second(seconds)
12801310
else:
12811311
nanos = delta_to_nanoseconds(delta)
12821312
if nanos % 1_000_000 == 0:
@@ -4674,7 +4704,7 @@ cpdef to_offset(freq, bint is_period=False):
46744704
<2 * BusinessDays>
46754705
46764706
>>> to_offset(pd.Timedelta(days=1))
4677-
<Day>
4707+
<24 * Hours>
46784708
46794709
>>> to_offset(pd.offsets.Hour())
46804710
<Hour>
@@ -4741,7 +4771,7 @@ cpdef to_offset(freq, bint is_period=False):
47414771
)
47424772
prefix = c_DEPR_ABBREVS[prefix]
47434773

4744-
if prefix in {"D", "h", "min", "s", "ms", "us", "ns"}:
4774+
if prefix in {"h", "min", "s", "ms", "us", "ns"}:
47454775
# For these prefixes, we have something like "3h" or
47464776
# "2.5min", so we can construct a Timedelta with the
47474777
# matching unit and get our offset from delta_to_tick
@@ -4759,6 +4789,12 @@ cpdef to_offset(freq, bint is_period=False):
47594789

47604790
if delta is None:
47614791
delta = offset
4792+
elif isinstance(delta, Day) and isinstance(offset, Tick):
4793+
# e.g. "1D1H" is treated like "25H"
4794+
delta = Hour(delta.n * 24) + offset
4795+
elif isinstance(offset, Day) and isinstance(delta, Tick):
4796+
# e.g. "1H1D" is treated like "25H"
4797+
delta = delta + Hour(offset.n * 24)
47624798
else:
47634799
delta = delta + offset
47644800
except (ValueError, TypeError) as err:

pandas/_libs/tslibs/period.pyx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ from pandas._libs.tslibs.offsets cimport (
118118
from pandas._libs.tslibs.offsets import (
119119
INVALID_FREQ_ERR_MSG,
120120
BDay,
121+
Day,
121122
)
122123

123124
cdef:
@@ -1827,6 +1828,10 @@ cdef class _Period(PeriodMixin):
18271828
# i.e. np.timedelta64("nat")
18281829
return NaT
18291830

1831+
if isinstance(other, Day):
1832+
# Periods are timezone-naive, so we treat Day as Tick-like
1833+
other = np.timedelta64(other.n, "D")
1834+
18301835
try:
18311836
inc = delta_to_nanoseconds(other, reso=self._dtype._creso, round_ok=False)
18321837
except ValueError as err:
@@ -1854,7 +1859,7 @@ cdef class _Period(PeriodMixin):
18541859
return NaT
18551860
return other.__add__(self)
18561861

1857-
if is_any_td_scalar(other):
1862+
if is_any_td_scalar(other) or isinstance(other, Day):
18581863
return self._add_timedeltalike_scalar(other)
18591864
elif is_offset_object(other):
18601865
return self._add_offset(other)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1922,15 +1922,18 @@ class Timedelta(_Timedelta):
19221922

19231923
from pandas._libs.tslibs.offsets import to_offset
19241924

1925-
to_offset(freq).nanos # raises on non-fixed freq
1925+
orig = freq
1926+
# In this context it is sufficiently clear that "D" this means 24H
1927+
freq = to_offset(freq)._maybe_to_hours()
1928+
freq.nanos # raises on non-fixed freq
19261929
unit = delta_to_nanoseconds(to_offset(freq), self._creso)
19271930

19281931
arr = np.array([self._value], dtype="i8")
19291932
try:
19301933
result = round_nsint64(arr, mode, unit)[0]
19311934
except OverflowError as err:
19321935
raise OutOfBoundsTimedelta(
1933-
f"Cannot round {self} to freq={freq} without overflow"
1936+
f"Cannot round {self} to freq={orig} without overflow"
19341937
) from err
19351938
return Timedelta._from_value_and_reso(result, self._creso)
19361939

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1895,7 +1895,8 @@ class Timestamp(_Timestamp):
18951895
cdef:
18961896
int64_t nanos
18971897

1898-
freq = to_offset(freq, is_period=False)
1898+
# In this context it is sufficiently clear that "D" this means 24H
1899+
freq = to_offset(freq, is_period=False)._maybe_to_hours()
18991900
freq.nanos # raises on non-fixed freq
19001901
nanos = delta_to_nanoseconds(freq, self._creso)
19011902
if nanos == 0:

pandas/core/arrays/datetimelike.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pandas._libs.arrays import NDArrayBacked
2828
from pandas._libs.tslibs import (
2929
BaseOffset,
30+
Day,
3031
IncompatibleFrequency,
3132
NaT,
3233
NaTType,
@@ -897,9 +898,16 @@ def inferred_freq(self) -> str | None:
897898
if self.ndim != 1:
898899
return None
899900
try:
900-
return frequencies.infer_freq(self)
901+
res = frequencies.infer_freq(self)
901902
except ValueError:
902903
return None
904+
if self.dtype.kind == "m" and res is not None and res.endswith("D"):
905+
# TimedeltaArray freq must be a Tick, so we convert the inferred
906+
# daily freq to hourly.
907+
if res == "D":
908+
return "24h"
909+
res = str(int(res[:-1]) * 24) + "h"
910+
return res
903911

904912
@property # NB: override with cache_readonly in immutable subclasses
905913
def _resolution_obj(self) -> Resolution | None:
@@ -1040,6 +1048,10 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
10401048
elif isinstance(self.freq, Tick):
10411049
# In these cases
10421050
return self.freq
1051+
elif isinstance(self.freq, Day) and getattr(self, "tz", None) is None:
1052+
return self.freq
1053+
# TODO: are there tzaware cases when we can reliably preserve freq?
1054+
# We have a bunch of tests that seem to think so
10431055
return None
10441056

10451057
@final
@@ -1139,6 +1151,10 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
11391151
res_m8 = res_values.view(f"timedelta64[{self.unit}]")
11401152

11411153
new_freq = self._get_arithmetic_result_freq(other)
1154+
if new_freq is not None:
1155+
# TODO: are we sure this is right?
1156+
new_freq = new_freq._maybe_to_hours()
1157+
11421158
new_freq = cast("Tick | None", new_freq)
11431159
return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)
11441160

@@ -1979,9 +1995,13 @@ def __init__(
19791995
if copy:
19801996
values = values.copy()
19811997
if freq:
1998+
if values.dtype.kind == "m" and isinstance(freq, Day):
1999+
raise TypeError("TimedeltaArray freq must be a Tick or None")
19822000
freq = to_offset(freq)
1983-
if values.dtype.kind == "m" and not isinstance(freq, Tick):
1984-
raise TypeError("TimedeltaArray/Index freq must be a Tick")
2001+
if values.dtype.kind == "m":
2002+
freq = freq._maybe_to_hours()
2003+
if not isinstance(freq, Tick):
2004+
raise TypeError("TimedeltaArray/Index freq must be a Tick")
19852005

19862006
NDArrayBacked.__init__(self, values=values, dtype=dtype)
19872007
self._freq = freq
@@ -2014,7 +2034,7 @@ def freq(self, value) -> None:
20142034
self._freq = value
20152035

20162036
@classmethod
2017-
def _validate_frequency(cls, index, freq, **kwargs):
2037+
def _validate_frequency(cls, index, freq: BaseOffset, **kwargs):
20182038
"""
20192039
Validate that a frequency is compatible with the values of a given
20202040
Datetime Array/Index or Timedelta Array/Index
@@ -2130,6 +2150,10 @@ def _round(self, freq, mode, ambiguous, nonexistent):
21302150
values = self.view("i8")
21312151
values = cast(np.ndarray, values)
21322152
offset = to_offset(freq)
2153+
2154+
# In this context it is clear "D" means "24H"
2155+
offset = offset._maybe_to_hours()
2156+
21332157
offset.nanos # raises on non-fixed frequencies
21342158
nanos = delta_to_nanoseconds(offset, self._creso)
21352159
if nanos == 0:
@@ -2210,6 +2234,9 @@ def _with_freq(self, freq) -> Self:
22102234
assert freq == "infer"
22112235
freq = to_offset(self.inferred_freq)
22122236

2237+
if self.dtype.kind == "m" and freq is not None:
2238+
assert isinstance(freq, Tick)
2239+
22132240
arr = self.view()
22142241
arr._freq = freq
22152242
return arr

pandas/core/arrays/datetimes.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,8 +451,10 @@ def _generate_range( # type: ignore[override]
451451
if end is not None:
452452
end = end.tz_localize(None)
453453

454-
if isinstance(freq, Tick):
455-
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
454+
if isinstance(freq, Tick) or (tz is None and isinstance(freq, Day)):
455+
i8values = generate_regular_range(
456+
start, end, periods, freq._maybe_to_hours(), unit=unit
457+
)
456458
else:
457459
xdr = _generate_range(
458460
start=start, end=end, periods=periods, offset=freq, unit=unit
@@ -899,7 +901,14 @@ def tz_convert(self, tz) -> Self:
899901

900902
# No conversion since timestamps are all UTC to begin with
901903
dtype = tz_to_dtype(tz, unit=self.unit)
902-
return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
904+
new_freq = self.freq
905+
if self.freq is not None and self.freq._adjust_dst:
906+
# TODO: in some cases we may be able to retain, e.g. if old and new
907+
# tz are both fixed offsets, or if no DST-crossings occur.
908+
# The latter is value-dependent behavior that we may want to avoid.
909+
# Or could convert e.g. "D" to "24h", see GH#51716
910+
new_freq = None
911+
return self._simple_new(self._ndarray, dtype=dtype, freq=new_freq)
903912

904913
@dtl.ravel_compat
905914
def tz_localize(

0 commit comments

Comments
 (0)