Skip to content

Commit f1d5bc3

Browse files
committed
API: offsets.Day is always calendar-day
1 parent 0eaca9e commit f1d5bc3

26 files changed

+331
-66
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,44 @@ This change also applies to :meth:`.DataFrameGroupBy.value_counts`. Here, there
297297
298298
df.groupby("a", sort=True).value_counts(sort=False)
299299
300+
.. _whatsnew_300.api_breaking.offsets_day_not_a_tick:
301+
302+
Changed behavior of ``pd.offsets.Day`` to always represent calendar-day
303+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
304+
305+
In previous versions of pandas, :class:`offsets.Day` represented a fixed span
306+
of 24 hours, disregarding Daylight Savings Time transitions. It now consistently
307+
behaves as a calendar-day, preserving time-of-day across DST transitions:
308+
309+
*Old behavior*
310+
311+
.. code-block:: ipython
312+
313+
In [5]: ts = pd.Timestamp("2025-03-08 08:00", tz="US/Eastern")
314+
In [6]: ts + pd.offsets.Day(1)
315+
Out[3]: Timestamp('2025-03-09 09:00:00-0400', tz='US/Eastern')
316+
317+
*New behavior*
318+
319+
.. ipython:: python
320+
321+
ts = pd.Timestamp("2025-03-08 08:00", tz="US/Eastern")
322+
ts + pd.offsets.Day(1)
323+
324+
This change fixes a long-standing bug in ``pd.date_range`` (:issue:`51716`, :issue:`35388`), but causes several
325+
small behavior differences as collateral:
326+
327+
- ``pd.offsets.Day(n)`` no longer compares as equal to ``pd.offsets.Hour(24*n)``
328+
- :class:`offsets.Day` no longer supports division
329+
- :class:`Timedelta` no longer accepts :class:`Day` objects as inputs
330+
- :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns
331+
a :class:`offsets.Hour` object in cases where it used to return a :class:`Day`
332+
object.
333+
- Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex`
334+
with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute.
335+
- Adding or subtracing a :class:`Day` with a :class:`Timedelta` is no longer supported.
336+
- Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise.
337+
300338
.. _whatsnew_300.api_breaking.deps:
301339

302340
Increased minimum version for Python

pandas/_libs/tslibs/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
__all__ = [
22
"BaseOffset",
3+
"Day",
34
"IncompatibleFrequency",
45
"NaT",
56
"NaTType",
@@ -61,6 +62,7 @@
6162
)
6263
from pandas._libs.tslibs.offsets import (
6364
BaseOffset,
65+
Day,
6466
Tick,
6567
to_offset,
6668
)

pandas/_libs/tslibs/offsets.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class Tick(SingleConstructorOffset):
116116

117117
def delta_to_tick(delta: timedelta) -> Tick: ...
118118

119-
class Day(Tick): ...
119+
class Day(BaseOffset): ...
120120
class Hour(Tick): ...
121121
class Minute(Tick): ...
122122
class Second(Tick): ...

pandas/_libs/tslibs/offsets.pyx

Lines changed: 80 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,8 +1023,6 @@ cdef class Tick(SingleConstructorOffset):
10231023
# Note: Without making this cpdef, we get AttributeError when calling
10241024
# from __mul__
10251025
cpdef Tick _next_higher_resolution(Tick self):
1026-
if type(self) is Day:
1027-
return Hour(self.n * 24)
10281026
if type(self) is Hour:
10291027
return Minute(self.n * 60)
10301028
if type(self) is Minute:
@@ -1173,7 +1171,7 @@ cdef class Tick(SingleConstructorOffset):
11731171
self.normalize = False
11741172

11751173

1176-
cdef class Day(Tick):
1174+
cdef class Day(SingleConstructorOffset):
11771175
"""
11781176
Offset ``n`` days.
11791177
@@ -1203,11 +1201,73 @@ cdef class Day(Tick):
12031201
>>> ts + Day(-4)
12041202
Timestamp('2022-12-05 15:00:00')
12051203
"""
1204+
_adjust_dst = True
1205+
_attributes = tuple(["n", "normalize"])
12061206
_nanos_inc = 24 * 3600 * 1_000_000_000
12071207
_prefix = "D"
12081208
_period_dtype_code = PeriodDtypeCode.D
12091209
_creso = NPY_DATETIMEUNIT.NPY_FR_D
12101210

1211+
def __init__(self, n=1, normalize=False):
1212+
BaseOffset.__init__(self, n)
1213+
if normalize:
1214+
# GH#21427
1215+
raise ValueError(
1216+
"Day offset with `normalize=True` are not allowed."
1217+
)
1218+
1219+
def is_on_offset(self, dt) -> bool:
1220+
return True
1221+
1222+
@apply_wraps
1223+
def _apply(self, other):
1224+
if isinstance(other, Day):
1225+
# TODO: why isn't this handled in __add__?
1226+
return Day(self.n + other.n)
1227+
return other + np.timedelta64(self.n, "D")
1228+
1229+
def _apply_array(self, dtarr):
1230+
return dtarr + np.timedelta64(self.n, "D")
1231+
1232+
@cache_readonly
1233+
def freqstr(self) -> str:
1234+
"""
1235+
Return a string representing the frequency.
1236+
1237+
Examples
1238+
--------
1239+
>>> pd.Day(5).freqstr
1240+
'5D'
1241+
1242+
>>> pd.offsets.Day(1).freqstr
1243+
'D'
1244+
"""
1245+
if self.n != 1:
1246+
return str(self.n) + "D"
1247+
return "D"
1248+
1249+
# Having this here isn't strictly-correct post-GH#???
1250+
# but this gets called in timedelta.get_unit_for_round in cases where
1251+
# Day unambiguously means 24h.
1252+
@property
1253+
def nanos(self) -> int64_t:
1254+
"""
1255+
Returns an integer of the total number of nanoseconds.
1256+
1257+
See Also
1258+
--------
1259+
tseries.offsets.Hour.nanos :
1260+
Returns an integer of the total number of nanoseconds.
1261+
tseries.offsets.Day.nanos :
1262+
Returns an integer of the total number of nanoseconds.
1263+
1264+
Examples
1265+
--------
1266+
>>> pd.offsets.Hour(5).nanos
1267+
18000000000000
1268+
"""
1269+
return self.n * self._nanos_inc
1270+
12111271

12121272
cdef class Hour(Tick):
12131273
"""
@@ -1431,16 +1491,13 @@ cdef class Nano(Tick):
14311491
def delta_to_tick(delta: timedelta) -> Tick:
14321492
if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0:
14331493
# nanoseconds only for pd.Timedelta
1434-
if delta.seconds == 0:
1435-
return Day(delta.days)
1494+
seconds = delta.days * 86400 + delta.seconds
1495+
if seconds % 3600 == 0:
1496+
return Hour(seconds / 3600)
1497+
elif seconds % 60 == 0:
1498+
return Minute(seconds / 60)
14361499
else:
1437-
seconds = delta.days * 86400 + delta.seconds
1438-
if seconds % 3600 == 0:
1439-
return Hour(seconds / 3600)
1440-
elif seconds % 60 == 0:
1441-
return Minute(seconds / 60)
1442-
else:
1443-
return Second(seconds)
1500+
return Second(seconds)
14441501
else:
14451502
nanos = delta_to_nanoseconds(delta)
14461503
if nanos % 1_000_000 == 0:
@@ -5332,6 +5389,17 @@ cpdef to_offset(freq, bint is_period=False):
53325389
raise ValueError(INVALID_FREQ_ERR_MSG.format(
53335390
f"{freq}, failed to parse with error message: {repr(err)}")
53345391
) from err
5392+
5393+
# TODO(3.0?) once deprecation of "d" is enforced, the check for it here
5394+
# can be removed
5395+
if (
5396+
isinstance(result, Hour)
5397+
and result.n % 24 == 0
5398+
and ("d" in freq or "D" in freq)
5399+
):
5400+
# Since Day is no longer a Tick, delta_to_tick returns Hour above,
5401+
# so we convert back here.
5402+
result = Day(result.n // 24)
53355403
else:
53365404
result = None
53375405

pandas/_libs/tslibs/period.pyx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ from pandas._libs.tslibs.offsets cimport (
113113
from pandas._libs.tslibs.offsets import (
114114
INVALID_FREQ_ERR_MSG,
115115
BDay,
116+
Day,
116117
)
117118
from pandas.util._decorators import set_module
118119

@@ -1825,6 +1826,10 @@ cdef class _Period(PeriodMixin):
18251826
# i.e. np.timedelta64("nat")
18261827
return NaT
18271828

1829+
if isinstance(other, Day):
1830+
# Periods are timezone-naive, so we treat Day as Tick-like
1831+
other = np.timedelta64(other.n, "D")
1832+
18281833
try:
18291834
inc = delta_to_nanoseconds(other, reso=self._dtype._creso, round_ok=False)
18301835
except ValueError as err:
@@ -1846,7 +1851,7 @@ cdef class _Period(PeriodMixin):
18461851

18471852
@cython.overflowcheck(True)
18481853
def __add__(self, other):
1849-
if is_any_td_scalar(other):
1854+
if is_any_td_scalar(other) or isinstance(other, Day):
18501855
return self._add_timedeltalike_scalar(other)
18511856
elif is_offset_object(other):
18521857
return self._add_offset(other)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ from pandas._libs.tslibs.np_datetime import (
7878
)
7979

8080
from pandas._libs.tslibs.offsets cimport is_tick_object
81+
from pandas._libs.tslibs.offsets import Day
8182
from pandas._libs.tslibs.util cimport (
8283
is_array,
8384
is_float_object,
@@ -2577,4 +2578,8 @@ cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1:
25772578

25782579
freq = to_offset(freq)
25792580
freq.nanos # raises on non-fixed freq
2581+
if isinstance(freq, Day):
2582+
# In the "round" context, Day unambiguously means 24h, not calendar-day
2583+
freq = Timedelta(days=freq.n)
2584+
25802585
return delta_to_nanoseconds(freq, creso)

pandas/core/arrays/_ranges.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas._libs.lib import i8max
1313
from pandas._libs.tslibs import (
1414
BaseOffset,
15+
Day,
1516
OutOfBoundsDatetime,
1617
Timedelta,
1718
Timestamp,
@@ -55,8 +56,13 @@ def generate_regular_range(
5556
"""
5657
istart = start._value if start is not None else None
5758
iend = end._value if end is not None else None
58-
freq.nanos # raises if non-fixed frequency
59-
td = Timedelta(freq)
59+
if isinstance(freq, Day):
60+
# In contexts without a timezone, a Day offset is unambiguously
61+
# interpretable as Timedelta-like.
62+
td = Timedelta(days=freq.n)
63+
else:
64+
freq.nanos # raises if non-fixed frequency
65+
td = Timedelta(freq)
6066
b: int
6167
e: int
6268
try:

pandas/core/arrays/datetimelike.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
)
3030
from pandas._libs.tslibs import (
3131
BaseOffset,
32+
Day,
3233
IncompatibleFrequency,
3334
NaT,
3435
NaTType,
@@ -44,6 +45,7 @@
4445
ints_to_pydatetime,
4546
ints_to_pytimedelta,
4647
periods_per_day,
48+
timezones,
4749
to_offset,
4850
)
4951
from pandas._libs.tslibs.fields import (
@@ -1068,6 +1070,26 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
10681070
elif isinstance(self.freq, Tick):
10691071
# In these cases
10701072
return self.freq
1073+
elif self.dtype.kind == "m" and isinstance(other, Timedelta):
1074+
return self.freq
1075+
elif (
1076+
self.dtype.kind == "m"
1077+
and isinstance(other, Timestamp)
1078+
and (other.tz is None or timezones.is_utc(other.tz))
1079+
):
1080+
# e.g. test_td64arr_add_sub_datetimelike_scalar tdarr + timestamp
1081+
# gives a DatetimeArray. As long as the timestamp has no timezone
1082+
# or UTC, the result can retain a Day freq.
1083+
return self.freq
1084+
elif (
1085+
lib.is_np_dtype(self.dtype, "M")
1086+
and isinstance(self.freq, Day)
1087+
and isinstance(other, Timedelta)
1088+
):
1089+
# e.g. TestTimedelta64ArithmeticUnsorted::test_timedelta
1090+
# Day is unambiguously 24h
1091+
return self.freq
1092+
10711093
return None
10721094

10731095
@final
@@ -1358,6 +1380,10 @@ def __add__(self, other):
13581380
result: np.ndarray | DatetimeLikeArrayMixin = self._add_nat()
13591381
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
13601382
result = self._add_timedeltalike_scalar(other)
1383+
elif isinstance(other, Day) and lib.is_np_dtype(self.dtype, "Mm"):
1384+
# We treat this as Tick-like
1385+
td = Timedelta(days=other.n).as_unit("s")
1386+
result = self._add_timedeltalike_scalar(td)
13611387
elif isinstance(other, BaseOffset):
13621388
# specifically _not_ a Tick
13631389
result = self._add_offset(other)
@@ -1418,6 +1444,10 @@ def __sub__(self, other):
14181444
result: np.ndarray | DatetimeLikeArrayMixin = self._sub_nat()
14191445
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
14201446
result = self._add_timedeltalike_scalar(-other)
1447+
elif isinstance(other, Day) and lib.is_np_dtype(self.dtype, "Mm"):
1448+
# We treat this as Tick-like
1449+
td = Timedelta(days=other.n).as_unit("s")
1450+
result = self._add_timedeltalike_scalar(-td)
14211451
elif isinstance(other, BaseOffset):
14221452
# specifically _not_ a Tick
14231453
result = self._add_offset(-other)
@@ -1982,7 +2012,7 @@ def freq(self, value) -> None:
19822012
if value is not None:
19832013
value = to_offset(value)
19842014
self._validate_frequency(self, value)
1985-
if self.dtype.kind == "m" and not isinstance(value, Tick):
2015+
if self.dtype.kind == "m" and not isinstance(value, (Tick, Day)):
19862016
raise TypeError("TimedeltaArray/Index freq must be a Tick")
19872017

19882018
if self.ndim > 1:
@@ -2279,7 +2309,7 @@ def _with_freq(self, freq) -> Self:
22792309
pass
22802310
elif len(self) == 0 and isinstance(freq, BaseOffset):
22812311
# Always valid. In the TimedeltaArray case, we require a Tick offset
2282-
if self.dtype.kind == "m" and not isinstance(freq, Tick):
2312+
if self.dtype.kind == "m" and not isinstance(freq, (Tick, Day)):
22832313
raise TypeError("TimedeltaArray/Index freq must be a Tick")
22842314
else:
22852315
# As an internal method, we can ensure this assertion always holds

pandas/core/arrays/datetimes.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ def _generate_range(
474474
if end is not None:
475475
end = end.tz_localize(None)
476476

477-
if isinstance(freq, Tick):
477+
if isinstance(freq, (Tick, Day)):
478478
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
479479
else:
480480
xdr = _generate_range(
@@ -928,7 +928,10 @@ def tz_convert(self, tz) -> Self:
928928

929929
# No conversion since timestamps are all UTC to begin with
930930
dtype = tz_to_dtype(tz, unit=self.unit)
931-
return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
931+
new_freq = None
932+
if isinstance(self.freq, Tick):
933+
new_freq = self.freq
934+
return self._simple_new(self._ndarray, dtype=dtype, freq=new_freq)
932935

933936
@dtl.ravel_compat
934937
def tz_localize(

0 commit comments

Comments
 (0)