diff --git a/doc/source/release.rst b/doc/source/release.rst index 4a25a98f2cfbe..68bcc9c14a01b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -174,6 +174,8 @@ Improvements to existing features - :meth:`~pandas.io.json.json_normalize` is a new method to allow you to create a flat table from semi-structured JSON data. :ref:`See the docs` (:issue:`1067`) - ``DataFrame.from_records()`` will now accept generators (:issue:`4910`) + - DatetimeIndex (and date_range) can now be constructed in a left- or + right-open fashion using the ``closed`` parameter (:issue:`4579`) API Changes ~~~~~~~~~~~ diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 33c90d3714e8a..a2b46f74244e2 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -115,6 +115,9 @@ class DatetimeIndex(Int64Index): end : end time, datetime-like, optional If periods is none, generated index will extend to first conforming time on or just past end argument + closed : string or None, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None) """ _join_precedence = 10 @@ -143,7 +146,8 @@ class DatetimeIndex(Int64Index): def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, - verify_integrity=True, normalize=False, **kwds): + verify_integrity=True, normalize=False, + closed=None, **kwds): dayfirst = kwds.pop('dayfirst', None) yearfirst = kwds.pop('yearfirst', None) @@ -184,7 +188,7 @@ def __new__(cls, data=None, if data is None: return cls._generate(start, end, periods, name, offset, - tz=tz, normalize=normalize, + tz=tz, normalize=normalize, closed=closed, infer_dst=infer_dst) if not isinstance(data, np.ndarray): @@ -289,7 +293,7 @@ def __new__(cls, data=None, @classmethod def _generate(cls, start, end, periods, name, offset, - tz=None, normalize=False, infer_dst=False): + tz=None, normalize=False, infer_dst=False, closed=None): if com._count_not_none(start, end, periods) != 2: raise ValueError('Must specify two of start, end, or periods') @@ -301,6 +305,24 @@ def _generate(cls, start, end, periods, name, offset, if end is not None: end = Timestamp(end) + left_closed = False + right_closed = False + + if start is None and end is None: + if closed is not None: + raise ValueError("Closed has to be None if not both of start" + "and end are defined") + + if closed is None: + left_closed = True + right_closed = True + elif closed == "left": + left_closed = True + elif closed == "right": + right_closed = True + else: + raise ValueError("Closed has to be either 'left', 'right' or None") + try: inferred_tz = tools._infer_tzinfo(start, end) except: @@ -387,6 +409,11 @@ def _generate(cls, start, end, periods, name, offset, index.offset = offset index.tz = tz + if not left_closed: + index = index[1:] + if not right_closed: + index = index[:-1] + return index def _box_values(self, values): @@ -1715,7 +1742,7 @@ def _generate_regular_range(start, end, periods, offset): def date_range(start=None, end=None, periods=None, freq='D', tz=None, - normalize=False, name=None): + normalize=False, name=None, closed=None): """ Return a fixed frequency datetime index, with day (calendar) as the default frequency @@ -1737,6 +1764,9 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, Normalize start/end dates to midnight before generating date range name : str, default None Name of the resulting index + closed : string or None, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None) Notes ----- @@ -1747,11 +1777,12 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, rng : DatetimeIndex """ return DatetimeIndex(start=start, end=end, periods=periods, - freq=freq, tz=tz, normalize=normalize, name=name) + freq=freq, tz=tz, normalize=normalize, name=name, + closed=closed) def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, - normalize=True, name=None): + normalize=True, name=None, closed=None): """ Return a fixed frequency datetime index, with business day as the default frequency @@ -1773,6 +1804,9 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Normalize start/end dates to midnight before generating date range name : str, default None Name for the resulting index + closed : string or None, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None) Notes ----- @@ -1784,11 +1818,12 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, """ return DatetimeIndex(start=start, end=end, periods=periods, - freq=freq, tz=tz, normalize=normalize, name=name) + freq=freq, tz=tz, normalize=normalize, name=name, + closed=closed) def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, - normalize=True, name=None, **kwargs): + normalize=True, name=None, closed=None, **kwargs): """ **EXPERIMENTAL** Return a fixed frequency datetime index, with CustomBusinessDay as the default frequency @@ -1820,6 +1855,9 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, holidays : list list/array of dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar`` + closed : string or None, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None) Notes ----- @@ -1835,7 +1873,8 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, weekmask = kwargs.pop('weekmask', 'Mon Tue Wed Thu Fri') freq = CDay(holidays=holidays, weekmask=weekmask) return DatetimeIndex(start=start, end=end, periods=periods, freq=freq, - tz=tz, normalize=normalize, name=name, **kwargs) + tz=tz, normalize=normalize, name=name, + closed=closed, **kwargs) def _to_m8(key, tz=None): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index e496bf46cf57a..ad7d3ba03a129 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -8,6 +8,7 @@ # import after tools, dateutil check from dateutil.relativedelta import relativedelta import pandas.tslib as tslib +from pandas.tslib import Timestamp import numpy as np from pandas import _np_version_under1p7 @@ -92,9 +93,9 @@ def apply(self, other): else: for i in range(-self.n): other = other - self._offset - return other + return Timestamp(other) else: - return other + timedelta(self.n) + return Timestamp(other + timedelta(self.n)) def isAnchored(self): return (self.n == 1) @@ -373,7 +374,7 @@ def apply(self, other): if self.offset: result = result + self.offset - return result + return Timestamp(result) elif isinstance(other, (timedelta, Tick)): return BDay(self.n, offset=self.offset + other, @@ -516,7 +517,7 @@ def apply(self, other): if n <= 0: n = n + 1 other = other + relativedelta(months=n, day=31) - return other + return Timestamp(other) @classmethod def onOffset(cls, dt): @@ -538,7 +539,7 @@ def apply(self, other): n += 1 other = other + relativedelta(months=n, day=1) - return other + return Timestamp(other) @classmethod def onOffset(cls, dt): @@ -660,7 +661,7 @@ def apply(self, other): other = other + timedelta((self.weekday - otherDay) % 7) for i in range(-k): other = other - self._inc - return other + return Timestamp(other) def onOffset(self, dt): return dt.weekday() == self.weekday @@ -901,7 +902,7 @@ def apply(self, other): other = other + relativedelta(months=monthsToGo + 3 * n, day=31) - return other + return Timestamp(other) def onOffset(self, dt): modMonth = (dt.month - self.startingMonth) % 3 @@ -941,7 +942,7 @@ def apply(self, other): n = n + 1 other = other + relativedelta(months=3 * n - monthsSince, day=1) - return other + return Timestamp(other) @property def rule_code(self): @@ -1093,7 +1094,7 @@ def _rollf(date): # n == 0, roll forward result = _rollf(result) - return result + return Timestamp(result) def onOffset(self, dt): wkday, days_in_month = tslib.monthrange(dt.year, self.month) @@ -1151,7 +1152,7 @@ def _rollf(date): # n == 0, roll forward result = _rollf(result) - return result + return Timestamp(result) def onOffset(self, dt): return dt.month == self.month and dt.day == 1 diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index cb17375266edf..3b40e75194d11 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -394,6 +394,21 @@ def test_month_range_union_tz(self): early_dr.union(late_dr) + def test_range_closed(self): + begin = datetime(2011, 1, 1) + end = datetime(2014, 1, 1) + + for freq in ["3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + + expected_left = closed[:-1] + expected_right = closed[1:] + + self.assert_(expected_left.equals(left)) + self.assert_(expected_right.equals(right)) + class TestCustomDateRange(unittest.TestCase):