Skip to content

Commit 908a88e

Browse files
committed
API: date_range, timedelta_range infer unit from start/end/freq
1 parent 2d73d62 commit 908a88e

File tree

111 files changed

+522
-342
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+522
-342
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,7 @@ Other API changes
741741
the dtype of the resulting Index (:issue:`60797`)
742742
- :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
743743
- :class:`Series` "flex" methods like :meth:`Series.add` no longer allow passing a :class:`DataFrame` for ``other``; use the DataFrame reversed method instead (:issue:`46179`)
744+
- :func:`date_range` and :func:`timedelta_range` no longer default to ``unit="ns"``, instead will infer a unit from the ``start``, ``end``, and ``freq`` parameters. Explicitly specify a desired ``unit`` to override these (:issue:`59031`)
744745
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
745746
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
746747
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)

pandas/core/indexes/datetimes.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
timezones,
2626
to_offset,
2727
)
28+
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
2829
from pandas._libs.tslibs.offsets import prefix_mapping
2930
from pandas.errors import Pandas4Warning
3031
from pandas.util._decorators import (
@@ -883,7 +884,7 @@ def date_range(
883884
name: Hashable | None = None,
884885
inclusive: IntervalClosedType = "both",
885886
*,
886-
unit: TimeUnit = "ns",
887+
unit: TimeUnit | None = None,
887888
**kwargs,
888889
) -> DatetimeIndex:
889890
"""
@@ -922,8 +923,9 @@ def date_range(
922923
Name of the resulting DatetimeIndex.
923924
inclusive : {"both", "neither", "left", "right"}, default "both"
924925
Include boundaries; Whether to set each bound as closed or open.
925-
unit : {'s', 'ms', 'us', 'ns'}, default 'ns'
926+
unit : {'s', 'ms', 'us', 'ns', None}, default None
926927
Specify the desired resolution of the result.
928+
If not specified, this is inferred from the 'start', 'end', and 'freq'
927929
928930
.. versionadded:: 2.0.0
929931
**kwargs
@@ -1063,6 +1065,37 @@ def date_range(
10631065
if freq is None and com.any_none(periods, start, end):
10641066
freq = "D"
10651067

1068+
if unit is None:
1069+
# Infer the unit based on the inputs
1070+
1071+
if start is not None and end is not None:
1072+
start = Timestamp(start)
1073+
end = Timestamp(end)
1074+
if abbrev_to_npy_unit(start.unit) > abbrev_to_npy_unit(end.unit):
1075+
unit = start.unit
1076+
else:
1077+
unit = end.unit
1078+
elif start is not None:
1079+
start = Timestamp(start)
1080+
unit = start.unit
1081+
else:
1082+
end = Timestamp(end)
1083+
unit = end.unit
1084+
1085+
# Last we need to watch out for cases where the 'freq' implies a higher
1086+
# unit than either start or end
1087+
if freq is not None:
1088+
freq = to_offset(freq)
1089+
creso = abbrev_to_npy_unit(unit)
1090+
if isinstance(freq, Tick):
1091+
if freq._creso > creso:
1092+
unit = freq.base.freqstr
1093+
elif hasattr(freq, "offset") and freq.offset is not None:
1094+
# e.g. BDay with an offset
1095+
td = Timedelta(freq.offset)
1096+
if abbrev_to_npy_unit(td.unit) > creso:
1097+
unit = td.unit
1098+
10661099
dtarr = DatetimeArray._generate_range(
10671100
start=start,
10681101
end=end,

pandas/core/indexes/timedeltas.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
Timedelta,
1414
to_offset,
1515
)
16+
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
1617
from pandas.util._decorators import set_module
1718

1819
from pandas.core.dtypes.common import (
@@ -252,7 +253,7 @@ def timedelta_range(
252253
name=None,
253254
closed=None,
254255
*,
255-
unit: TimeUnit = "ns",
256+
unit: TimeUnit | None = None,
256257
) -> TimedeltaIndex:
257258
"""
258259
Return a fixed frequency TimedeltaIndex with day as the default.
@@ -272,8 +273,9 @@ def timedelta_range(
272273
closed : str, default None
273274
Make the interval closed with respect to the given frequency to
274275
the 'left', 'right', or both sides (None).
275-
unit : {'s', 'ms', 'us', 'ns'}, default 'ns'
276+
unit : {'s', 'ms', 'us', 'ns', None}, default None
276277
Specify the desired resolution of the result.
278+
If not specified, this is inferred from the 'start', 'end', and 'freq'
277279
278280
.. versionadded:: 2.0.0
279281
@@ -337,8 +339,39 @@ def timedelta_range(
337339
"""
338340
if freq is None and com.any_none(periods, start, end):
339341
freq = "D"
340-
341342
freq = to_offset(freq)
343+
344+
if com.count_not_none(start, end, periods, freq) != 3:
345+
# This check needs to come before the `unit = start.unit` line below
346+
raise ValueError(
347+
"Of the four parameters: start, end, periods, "
348+
"and freq, exactly three must be specified"
349+
)
350+
351+
if unit is None:
352+
# Infer the unit based on the inputs
353+
354+
if start is not None and end is not None:
355+
start = Timedelta(start)
356+
end = Timedelta(end)
357+
if abbrev_to_npy_unit(start.unit) > abbrev_to_npy_unit(end.unit):
358+
unit = start.unit
359+
else:
360+
unit = end.unit
361+
elif start is not None:
362+
start = Timedelta(start)
363+
unit = start.unit
364+
else:
365+
end = Timedelta(end)
366+
unit = end.unit
367+
368+
# Last we need to watch out for cases where the 'freq' implies a higher
369+
# unit than either start or end
370+
if freq is not None:
371+
creso = abbrev_to_npy_unit(unit)
372+
if freq._creso > creso:
373+
unit = freq.base.freqstr
374+
342375
tdarr = TimedeltaArray._generate_range(
343376
start, end, periods, freq, closed=closed, unit=unit
344377
)

pandas/tests/apply/test_frame_apply.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -653,13 +653,15 @@ def test_apply_dict(df, dicts):
653653

654654
def test_apply_non_numpy_dtype():
655655
# GH 12244
656-
df = DataFrame({"dt": date_range("2015-01-01", periods=3, tz="Europe/Brussels")})
656+
df = DataFrame(
657+
{"dt": date_range("2015-01-01", periods=3, tz="Europe/Brussels", unit="ns")}
658+
)
657659
result = df.apply(lambda x: x)
658660
tm.assert_frame_equal(result, df)
659661

660662
result = df.apply(lambda x: x + pd.Timedelta("1day"))
661663
expected = DataFrame(
662-
{"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels")}
664+
{"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels", unit="ns")}
663665
)
664666
tm.assert_frame_equal(result, expected)
665667

@@ -1425,7 +1427,7 @@ def test_nuiscance_columns():
14251427
"A": [1, 2, 3],
14261428
"B": [1.0, 2.0, 3.0],
14271429
"C": ["foo", "bar", "baz"],
1428-
"D": date_range("20130101", periods=3),
1430+
"D": date_range("20130101", periods=3, unit="ns"),
14291431
}
14301432
)
14311433

pandas/tests/arithmetic/test_datetime64.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,7 @@ def test_comparison_tzawareness_compat(self, comparison_op, box_with_array):
589589
op = comparison_op
590590
box = box_with_array
591591

592-
dr = date_range("2016-01-01", periods=6)
592+
dr = date_range("2016-01-01", periods=6, unit="ns")
593593
dz = dr.tz_localize("US/Pacific")
594594

595595
dr = tm.box_expected(dr, box)
@@ -637,7 +637,7 @@ def test_comparison_tzawareness_compat_scalars(self, comparison_op, box_with_arr
637637
# GH#18162
638638
op = comparison_op
639639

640-
dr = date_range("2016-01-01", periods=6)
640+
dr = date_range("2016-01-01", periods=6, unit="ns")
641641
dz = dr.tz_localize("US/Pacific")
642642

643643
dr = tm.box_expected(dr, box_with_array)
@@ -676,7 +676,7 @@ def test_scalar_comparison_tzawareness(
676676
):
677677
op = comparison_op
678678
tz = tz_aware_fixture
679-
dti = date_range("2016-01-01", periods=2, tz=tz)
679+
dti = date_range("2016-01-01", periods=2, tz=tz, unit="ns")
680680

681681
dtarr = tm.box_expected(dti, box_with_array)
682682
xbox = get_upcast_box(dtarr, other, True)
@@ -833,8 +833,8 @@ def test_dt64arr_add_timedeltalike_scalar(
833833
# GH#22005, GH#22163 check DataFrame doesn't raise TypeError
834834
tz = tz_naive_fixture
835835

836-
rng = date_range("2000-01-01", "2000-02-01", tz=tz)
837-
expected = date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz)
836+
rng = date_range("2000-01-01", "2000-02-01", tz=tz, unit="ns")
837+
expected = date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz, unit="ns")
838838
if tz is not None:
839839
expected = expected._with_freq(None)
840840

@@ -855,8 +855,8 @@ def test_dt64arr_sub_timedeltalike_scalar(
855855
):
856856
tz = tz_naive_fixture
857857

858-
rng = date_range("2000-01-01", "2000-02-01", tz=tz)
859-
expected = date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz)
858+
rng = date_range("2000-01-01", "2000-02-01", tz=tz, unit="ns")
859+
expected = date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz, unit="ns")
860860
if tz is not None:
861861
expected = expected._with_freq(None)
862862

@@ -870,7 +870,7 @@ def test_dt64arr_sub_timedeltalike_scalar(
870870
tm.assert_equal(rng, expected)
871871

872872
def test_dt64_array_sub_dt_with_different_timezone(self, box_with_array):
873-
t1 = date_range("20130101", periods=3).tz_localize("US/Eastern")
873+
t1 = date_range("20130101", periods=3, unit="ns").tz_localize("US/Eastern")
874874
t1 = tm.box_expected(t1, box_with_array)
875875
t2 = Timestamp("20130101").tz_localize("CET")
876876
tnaive = Timestamp(20130101)
@@ -897,11 +897,11 @@ def test_dt64_array_sub_dt_with_different_timezone(self, box_with_array):
897897
tnaive - t1
898898

899899
def test_dt64_array_sub_dt64_array_with_different_timezone(self, box_with_array):
900-
t1 = date_range("20130101", periods=3).tz_localize("US/Eastern")
900+
t1 = date_range("20130101", periods=3, unit="ns").tz_localize("US/Eastern")
901901
t1 = tm.box_expected(t1, box_with_array)
902-
t2 = date_range("20130101", periods=3).tz_localize("CET")
902+
t2 = date_range("20130101", periods=3, unit="ns").tz_localize("CET")
903903
t2 = tm.box_expected(t2, box_with_array)
904-
tnaive = date_range("20130101", periods=3)
904+
tnaive = date_range("20130101", periods=3, unit="ns")
905905

906906
result = t1 - t2
907907
expected = TimedeltaIndex(
@@ -928,7 +928,7 @@ def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture):
928928
# GH#23320 special handling for timedelta64("NaT")
929929
tz = tz_naive_fixture
930930

931-
dti = date_range("1994-04-01", periods=9, tz=tz, freq="QS")
931+
dti = date_range("1994-04-01", periods=9, tz=tz, freq="QS", unit="ns")
932932
other = np.timedelta64("NaT")
933933
expected = DatetimeIndex(["NaT"] * 9, tz=tz).as_unit("ns")
934934

@@ -947,11 +947,11 @@ def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture):
947947

948948
def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array):
949949
tz = tz_naive_fixture
950-
dti = date_range("2016-01-01", periods=3, tz=tz)
950+
dti = date_range("2016-01-01", periods=3, tz=tz, unit="ns")
951951
tdi = TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"])
952952
tdarr = tdi.values
953953

954-
expected = date_range("2015-12-31", "2016-01-02", periods=3, tz=tz)
954+
expected = date_range("2015-12-31", "2016-01-02", periods=3, tz=tz, unit="ns")
955955

956956
dtarr = tm.box_expected(dti, box_with_array)
957957
expected = tm.box_expected(expected, box_with_array)
@@ -961,7 +961,7 @@ def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array):
961961
result = tdarr + dtarr
962962
tm.assert_equal(result, expected)
963963

964-
expected = date_range("2016-01-02", "2016-01-04", periods=3, tz=tz)
964+
expected = date_range("2016-01-02", "2016-01-04", periods=3, tz=tz, unit="ns")
965965
expected = tm.box_expected(expected, box_with_array)
966966

967967
result = dtarr - tdarr
@@ -991,7 +991,7 @@ def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array):
991991
)
992992
def test_dt64arr_sub_dtscalar(self, box_with_array, ts):
993993
# GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype
994-
idx = date_range("2013-01-01", periods=3)._with_freq(None)
994+
idx = date_range("2013-01-01", periods=3, unit="ns")._with_freq(None)
995995
idx = tm.box_expected(idx, box_with_array)
996996

997997
expected = TimedeltaIndex(["0 Days", "1 Day", "2 Days"])
@@ -1005,7 +1005,7 @@ def test_dt64arr_sub_dtscalar(self, box_with_array, ts):
10051005
tm.assert_equal(result, -expected)
10061006

10071007
def test_dt64arr_sub_timestamp_tzaware(self, box_with_array):
1008-
ser = date_range("2014-03-17", periods=2, freq="D", tz="US/Eastern")
1008+
ser = date_range("2014-03-17", periods=2, freq="D", tz="US/Eastern", unit="ns")
10091009
ser = ser._with_freq(None)
10101010
ts = ser[0]
10111011

@@ -1312,11 +1312,11 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array):
13121312
# GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype
13131313
tz = tz_aware_fixture
13141314
if tz == "US/Pacific":
1315-
dates = date_range("2012-11-01", periods=3, tz=tz)
1315+
dates = date_range("2012-11-01", periods=3, tz=tz, unit="ns")
13161316
offset = dates + pd.offsets.Hour(5)
13171317
assert dates[0] + pd.offsets.Hour(5) == offset[0]
13181318

1319-
dates = date_range("2010-11-01 00:00", periods=3, tz=tz, freq="h")
1319+
dates = date_range("2010-11-01 00:00", periods=3, tz=tz, freq="h", unit="ns")
13201320
expected = DatetimeIndex(
13211321
["2010-11-01 05:00", "2010-11-01 06:00", "2010-11-01 07:00"],
13221322
freq="h",
@@ -1604,7 +1604,9 @@ def test_dti_add_sub_nonzero_mth_offset(
16041604
):
16051605
# GH 26258
16061606
tz = tz_aware_fixture
1607-
date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="YS", tz=tz)
1607+
date = date_range(
1608+
start="01 Jan 2014", end="01 Jan 2017", freq="YS", tz=tz, unit="ns"
1609+
)
16081610
date = tm.box_expected(date, box_with_array, False)
16091611
mth = getattr(date, op)
16101612
result = mth(offset)
@@ -1667,7 +1669,7 @@ def test_dt64_series_arith_overflow(self):
16671669
# GH#12534, fixed by GH#19024
16681670
dt = Timestamp("1700-01-31")
16691671
td = Timedelta("20000 Days")
1670-
dti = date_range("1949-09-30", freq="100YE", periods=4)
1672+
dti = date_range("1949-09-30", freq="100YE", periods=4, unit="ns")
16711673
ser = Series(dti)
16721674
msg = "Overflow in int64 addition"
16731675
with pytest.raises(OverflowError, match=msg):
@@ -1890,7 +1892,7 @@ def test_sub_single_tz(self, unit):
18901892
def test_dt64tz_series_sub_dtitz(self):
18911893
# GH#19071 subtracting tzaware DatetimeIndex from tzaware Series
18921894
# (with same tz) raises, fixed by #19024
1893-
dti = date_range("1999-09-30", periods=10, tz="US/Pacific")
1895+
dti = date_range("1999-09-30", periods=10, tz="US/Pacific", unit="ns")
18941896
ser = Series(dti)
18951897
expected = Series(TimedeltaIndex(["0days"] * 10))
18961898

@@ -2040,7 +2042,7 @@ def test_dti_add_tdi(self, tz_naive_fixture):
20402042
tz = tz_naive_fixture
20412043
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
20422044
tdi = pd.timedelta_range("0 days", periods=10)
2043-
expected = date_range("2017-01-01", periods=10, tz=tz)
2045+
expected = date_range("2017-01-01", periods=10, tz=tz, unit="ns")
20442046
expected = expected._with_freq(None)
20452047

20462048
# add with TimedeltaIndex
@@ -2062,7 +2064,7 @@ def test_dti_iadd_tdi(self, tz_naive_fixture):
20622064
tz = tz_naive_fixture
20632065
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
20642066
tdi = pd.timedelta_range("0 days", periods=10)
2065-
expected = date_range("2017-01-01", periods=10, tz=tz)
2067+
expected = date_range("2017-01-01", periods=10, tz=tz, unit="ns")
20662068
expected = expected._with_freq(None)
20672069

20682070
# iadd with TimedeltaIndex
@@ -2088,7 +2090,7 @@ def test_dti_sub_tdi(self, tz_naive_fixture):
20882090
tz = tz_naive_fixture
20892091
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
20902092
tdi = pd.timedelta_range("0 days", periods=10)
2091-
expected = date_range("2017-01-01", periods=10, tz=tz, freq="-1D")
2093+
expected = date_range("2017-01-01", periods=10, tz=tz, freq="-1D", unit="ns")
20922094
expected = expected._with_freq(None)
20932095

20942096
# sub with TimedeltaIndex

0 commit comments

Comments
 (0)