Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.DateOffset.is_on_offset GL08" \
-i "pandas.tseries.offsets.DateOffset.n GL08" \
-i "pandas.tseries.offsets.DateOffset.normalize GL08" \
-i "pandas.tseries.offsets.Day.freqstr SA01" \
-i "pandas.tseries.offsets.Day.is_on_offset GL08" \
-i "pandas.tseries.offsets.Day.n GL08" \
-i "pandas.tseries.offsets.Day.normalize GL08" \
Expand Down
38 changes: 38 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,44 @@ This change also applies to :meth:`.DataFrameGroupBy.value_counts`. Here, there

df.groupby("a", sort=True).value_counts(sort=False)

.. _whatsnew_300.api_breaking.offsets_day_not_a_tick:

Changed behavior of ``pd.offsets.Day`` to always represent calendar-day
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In previous versions of pandas, :class:`offsets.Day` represented a fixed span
of 24 hours, disregarding Daylight Savings Time transitions. It now consistently
behaves as a calendar-day, preserving time-of-day across DST transitions:

*Old behavior*

.. code-block:: ipython

In [5]: ts = pd.Timestamp("2025-03-08 08:00", tz="US/Eastern")
In [6]: ts + pd.offsets.Day(1)
Out[3]: Timestamp('2025-03-09 09:00:00-0400', tz='US/Eastern')

*New behavior*

.. ipython:: python

ts = pd.Timestamp("2025-03-08 08:00", tz="US/Eastern")
ts + pd.offsets.Day(1)

This change fixes a long-standing bug in ``pd.date_range`` (:issue:`51716`, :issue:`35388`), but causes several
small behavior differences as collateral:

- ``pd.offsets.Day(n)`` no longer compares as equal to ``pd.offsets.Hour(24*n)``
- :class:`offsets.Day` no longer supports division
- :class:`Timedelta` no longer accepts :class:`Day` objects as inputs
- :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns
a :class:`offsets.Hour` object in cases where it used to return a :class:`Day`
object.
- Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex`
with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute.
- Adding or subtracing a :class:`Day` with a :class:`Timedelta` is no longer supported.
- Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise.

.. _whatsnew_300.api_breaking.deps:

Increased minimum version for Python
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
__all__ = [
"BaseOffset",
"Day",
"IncompatibleFrequency",
"NaT",
"NaTType",
Expand Down Expand Up @@ -61,6 +62,7 @@
)
from pandas._libs.tslibs.offsets import (
BaseOffset,
Day,
Tick,
to_offset,
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/offsets.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class Tick(SingleConstructorOffset):

def delta_to_tick(delta: timedelta) -> Tick: ...

class Day(Tick): ...
class Day(BaseOffset): ...
class Hour(Tick): ...
class Minute(Tick): ...
class Second(Tick): ...
Expand Down
92 changes: 80 additions & 12 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1023,8 +1023,6 @@ cdef class Tick(SingleConstructorOffset):
# Note: Without making this cpdef, we get AttributeError when calling
# from __mul__
cpdef Tick _next_higher_resolution(Tick self):
if type(self) is Day:
return Hour(self.n * 24)
if type(self) is Hour:
return Minute(self.n * 60)
if type(self) is Minute:
Expand Down Expand Up @@ -1173,7 +1171,7 @@ cdef class Tick(SingleConstructorOffset):
self.normalize = False


cdef class Day(Tick):
cdef class Day(SingleConstructorOffset):
"""
Offset ``n`` days.

Expand Down Expand Up @@ -1203,11 +1201,73 @@ cdef class Day(Tick):
>>> ts + Day(-4)
Timestamp('2022-12-05 15:00:00')
"""
_adjust_dst = True
_attributes = tuple(["n", "normalize"])
_nanos_inc = 24 * 3600 * 1_000_000_000
_prefix = "D"
_period_dtype_code = PeriodDtypeCode.D
_creso = NPY_DATETIMEUNIT.NPY_FR_D

def __init__(self, n=1, normalize=False):
BaseOffset.__init__(self, n)
if normalize:
# GH#21427
raise ValueError(
"Day offset with `normalize=True` are not allowed."
)

def is_on_offset(self, dt) -> bool:
return True

@apply_wraps
def _apply(self, other):
if isinstance(other, Day):
# TODO: why isn't this handled in __add__?
return Day(self.n + other.n)
return other + np.timedelta64(self.n, "D")

def _apply_array(self, dtarr):
return dtarr + np.timedelta64(self.n, "D")

@cache_readonly
def freqstr(self) -> str:
"""
Return a string representing the frequency.

Examples
--------
>>> pd.Day(5).freqstr
'5D'

>>> pd.offsets.Day(1).freqstr
'D'
"""
if self.n != 1:
return str(self.n) + "D"
return "D"

# Having this here isn't strictly-correct post-GH#???
# but this gets called in timedelta.get_unit_for_round in cases where
# Day unambiguously means 24h.
@property
def nanos(self) -> int64_t:
"""
Returns an integer of the total number of nanoseconds.

See Also
--------
tseries.offsets.Hour.nanos :
Returns an integer of the total number of nanoseconds.
tseries.offsets.Day.nanos :
Returns an integer of the total number of nanoseconds.

Examples
--------
>>> pd.offsets.Hour(5).nanos
18000000000000
"""
return self.n * self._nanos_inc


cdef class Hour(Tick):
"""
Expand Down Expand Up @@ -1431,16 +1491,13 @@ cdef class Nano(Tick):
def delta_to_tick(delta: timedelta) -> Tick:
if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0:
# nanoseconds only for pd.Timedelta
if delta.seconds == 0:
return Day(delta.days)
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(seconds / 3600)
elif seconds % 60 == 0:
return Minute(seconds / 60)
else:
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(seconds / 3600)
elif seconds % 60 == 0:
return Minute(seconds / 60)
else:
return Second(seconds)
return Second(seconds)
else:
nanos = delta_to_nanoseconds(delta)
if nanos % 1_000_000 == 0:
Expand Down Expand Up @@ -5332,6 +5389,17 @@ cpdef to_offset(freq, bint is_period=False):
raise ValueError(INVALID_FREQ_ERR_MSG.format(
f"{freq}, failed to parse with error message: {repr(err)}")
) from err

# TODO(3.0?) once deprecation of "d" is enforced, the check for it here
# can be removed
if (
isinstance(result, Hour)
and result.n % 24 == 0
and ("d" in freq or "D" in freq)
):
# Since Day is no longer a Tick, delta_to_tick returns Hour above,
# so we convert back here.
result = Day(result.n // 24)
else:
result = None

Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ from pandas._libs.tslibs.offsets cimport (
from pandas._libs.tslibs.offsets import (
INVALID_FREQ_ERR_MSG,
BDay,
Day,
)
from pandas.util._decorators import set_module

Expand Down Expand Up @@ -1825,6 +1826,10 @@ cdef class _Period(PeriodMixin):
# i.e. np.timedelta64("nat")
return NaT

if isinstance(other, Day):
# Periods are timezone-naive, so we treat Day as Tick-like
other = np.timedelta64(other.n, "D")

try:
inc = delta_to_nanoseconds(other, reso=self._dtype._creso, round_ok=False)
except ValueError as err:
Expand All @@ -1846,7 +1851,7 @@ cdef class _Period(PeriodMixin):

@cython.overflowcheck(True)
def __add__(self, other):
if is_any_td_scalar(other):
if is_any_td_scalar(other) or isinstance(other, Day):
return self._add_timedeltalike_scalar(other)
elif is_offset_object(other):
return self._add_offset(other)
Expand Down
5 changes: 5 additions & 0 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ from pandas._libs.tslibs.np_datetime import (
)

from pandas._libs.tslibs.offsets cimport is_tick_object
from pandas._libs.tslibs.offsets import Day
from pandas._libs.tslibs.util cimport (
is_array,
is_float_object,
Expand Down Expand Up @@ -2577,4 +2578,8 @@ cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1:

freq = to_offset(freq)
freq.nanos # raises on non-fixed freq
if isinstance(freq, Day):
# In the "round" context, Day unambiguously means 24h, not calendar-day
freq = Timedelta(days=freq.n)

return delta_to_nanoseconds(freq, creso)
10 changes: 8 additions & 2 deletions pandas/core/arrays/_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas._libs.lib import i8max
from pandas._libs.tslibs import (
BaseOffset,
Day,
OutOfBoundsDatetime,
Timedelta,
Timestamp,
Expand Down Expand Up @@ -55,8 +56,13 @@ def generate_regular_range(
"""
istart = start._value if start is not None else None
iend = end._value if end is not None else None
freq.nanos # raises if non-fixed frequency
td = Timedelta(freq)
if isinstance(freq, Day):
# In contexts without a timezone, a Day offset is unambiguously
# interpretable as Timedelta-like.
td = Timedelta(days=freq.n)
else:
freq.nanos # raises if non-fixed frequency
td = Timedelta(freq)
b: int
e: int
try:
Expand Down
34 changes: 32 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
)
from pandas._libs.tslibs import (
BaseOffset,
Day,
IncompatibleFrequency,
NaT,
NaTType,
Expand All @@ -44,6 +45,7 @@
ints_to_pydatetime,
ints_to_pytimedelta,
periods_per_day,
timezones,
to_offset,
)
from pandas._libs.tslibs.fields import (
Expand Down Expand Up @@ -1068,6 +1070,26 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
elif isinstance(self.freq, Tick):
# In these cases
return self.freq
elif self.dtype.kind == "m" and isinstance(other, Timedelta):
return self.freq
elif (
self.dtype.kind == "m"
and isinstance(other, Timestamp)
and (other.tz is None or timezones.is_utc(other.tz))
):
# e.g. test_td64arr_add_sub_datetimelike_scalar tdarr + timestamp
# gives a DatetimeArray. As long as the timestamp has no timezone
# or UTC, the result can retain a Day freq.
return self.freq
elif (
lib.is_np_dtype(self.dtype, "M")
and isinstance(self.freq, Day)
and isinstance(other, Timedelta)
):
# e.g. TestTimedelta64ArithmeticUnsorted::test_timedelta
# Day is unambiguously 24h
return self.freq

return None

@final
Expand Down Expand Up @@ -1358,6 +1380,10 @@ def __add__(self, other):
result: np.ndarray | DatetimeLikeArrayMixin = self._add_nat()
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
result = self._add_timedeltalike_scalar(other)
elif isinstance(other, Day) and lib.is_np_dtype(self.dtype, "Mm"):
# We treat this as Tick-like
td = Timedelta(days=other.n).as_unit("s")
result = self._add_timedeltalike_scalar(td)
elif isinstance(other, BaseOffset):
# specifically _not_ a Tick
result = self._add_offset(other)
Expand Down Expand Up @@ -1418,6 +1444,10 @@ def __sub__(self, other):
result: np.ndarray | DatetimeLikeArrayMixin = self._sub_nat()
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
result = self._add_timedeltalike_scalar(-other)
elif isinstance(other, Day) and lib.is_np_dtype(self.dtype, "Mm"):
# We treat this as Tick-like
td = Timedelta(days=other.n).as_unit("s")
result = self._add_timedeltalike_scalar(-td)
elif isinstance(other, BaseOffset):
# specifically _not_ a Tick
result = self._add_offset(-other)
Expand Down Expand Up @@ -1982,7 +2012,7 @@ def freq(self, value) -> None:
if value is not None:
value = to_offset(value)
self._validate_frequency(self, value)
if self.dtype.kind == "m" and not isinstance(value, Tick):
if self.dtype.kind == "m" and not isinstance(value, (Tick, Day)):
raise TypeError("TimedeltaArray/Index freq must be a Tick")

if self.ndim > 1:
Expand Down Expand Up @@ -2279,7 +2309,7 @@ def _with_freq(self, freq) -> Self:
pass
elif len(self) == 0 and isinstance(freq, BaseOffset):
# Always valid. In the TimedeltaArray case, we require a Tick offset
if self.dtype.kind == "m" and not isinstance(freq, Tick):
if self.dtype.kind == "m" and not isinstance(freq, (Tick, Day)):
raise TypeError("TimedeltaArray/Index freq must be a Tick")
else:
# As an internal method, we can ensure this assertion always holds
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def _generate_range(
if end is not None:
end = end.tz_localize(None)

if isinstance(freq, Tick):
if isinstance(freq, (Tick, Day)):
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
else:
xdr = _generate_range(
Expand Down Expand Up @@ -928,7 +928,10 @@ def tz_convert(self, tz) -> Self:

# No conversion since timestamps are all UTC to begin with
dtype = tz_to_dtype(tz, unit=self.unit)
return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
new_freq = None
if isinstance(self.freq, Tick):
new_freq = self.freq
return self._simple_new(self._ndarray, dtype=dtype, freq=new_freq)

@dtl.ravel_compat
def tz_localize(
Expand Down
Loading
Loading