diff --git a/doc/source/release.rst b/doc/source/release.rst index 7871e92b7953b..c0600c161e06e 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -55,6 +55,10 @@ New features - Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`) - Added the ``sym_diff`` method to ``Index`` (:issue:`5543`) +- Added ``to_julian_date`` to ``TimeStamp`` and ``DatetimeIndex``. The Julian + Date is used primarily in astronomy and represents the number of days from + noon, January 1, 4713 BC. Because nanoseconds are used to define the time + in PANDAS the actual range of dates that you can use is 1678 AD to 2262 AD. API Changes ~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index e914b2a4693d0..e7e7fba89a233 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -11,6 +11,8 @@ Highlights include: - MultIndexing Using Slicers - Joining a singly-indexed DataFrame with a multi-indexed DataFrame +- Added a ``to_julian_date`` function to ``TimeStamp`` and ``DatetimeIndex`` + to convert to the Julian Date used primarily in astronomy. API changes ~~~~~~~~~~~ diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index a8dacbe40aac0..c326b21610858 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -9,7 +9,7 @@ from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE, is_list_like,_values_from_object, _maybe_box, notnull, ABCSeries) -from pandas.core.index import Index, Int64Index, _Identity +from pandas.core.index import Index, Int64Index, _Identity, Float64Index import pandas.compat as compat from pandas.compat import u from pandas.tseries.frequencies import ( @@ -1753,6 +1753,33 @@ def max(self, axis=None): max_stamp = self.asi8.max() return Timestamp(max_stamp, tz=self.tz) + def to_julian_date(self): + """ + Convert DatetimeIndex to Float64Index of Julian Dates. + 0 Julian date is noon January 1, 4713 BC. + http://en.wikipedia.org/wiki/Julian_day + """ + + # http://mysite.verizon.net/aesir_research/date/jdalg2.htm + year = self.year + month = self.month + day = self.day + testarr = month < 3 + year[testarr] -= 1 + month[testarr] += 12 + return Float64Index(day + + np.fix((153*month - 457)/5) + + 365*year + + np.floor(year / 4) - + np.floor(year / 100) + + np.floor(year / 400) + + 1721118.5 + + (self.hour + + self.minute/60.0 + + self.second/3600.0 + + self.microsecond/3600.0/1e+6 + + self.nanosecond/3600.0/1e+9 + )/24.0) def _generate_regular_range(start, end, periods, offset): if isinstance(offset, Tick): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index d01548ee79e32..eeab4f46414df 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -13,7 +13,7 @@ from pandas import (Index, Series, TimeSeries, DataFrame, isnull, date_range, Timestamp, Period, DatetimeIndex, - Int64Index, to_datetime, bdate_range) + Int64Index, to_datetime, bdate_range, Float64Index) from pandas.core.daterange import DateRange import pandas.core.datetools as datetools @@ -3287,6 +3287,91 @@ def test_guess_datetime_format_for_array(self): ) self.assertTrue(format_for_string_of_nans is None) + +class TestTimestampToJulianDate(tm.TestCase): + + def test_compare_1700(self): + r = Timestamp('1700-06-23').to_julian_date() + self.assertEqual(r, 2342145.5) + + def test_compare_2000(self): + r = Timestamp('2000-04-12').to_julian_date() + self.assertEqual(r, 2451646.5) + + def test_compare_2100(self): + r = Timestamp('2100-08-12').to_julian_date() + self.assertEqual(r, 2488292.5) + + def test_compare_hour01(self): + r = Timestamp('2000-08-12T01:00:00').to_julian_date() + self.assertEqual(r, 2451768.5416666666666666) + + def test_compare_hour13(self): + r = Timestamp('2000-08-12T13:00:00').to_julian_date() + self.assertEqual(r, 2451769.0416666666666666) + + +class TestDateTimeIndexToJulianDate(tm.TestCase): + def test_1700(self): + r1 = Float64Index([2345897.5, + 2345898.5, + 2345899.5, + 2345900.5, + 2345901.5]) + r2 = date_range(start=Timestamp('1710-10-01'), + periods=5, + freq='D').to_julian_date() + self.assert_(isinstance(r2, Float64Index)) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + r1 = Float64Index([2451601.5, + 2451602.5, + 2451603.5, + 2451604.5, + 2451605.5]) + r2 = date_range(start=Timestamp('2000-02-27'), + periods=5, + freq='D').to_julian_date() + self.assert_(isinstance(r2, Float64Index)) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + r1 = Float64Index([2451601.5, + 2451601.5416666666666666, + 2451601.5833333333333333, + 2451601.625, + 2451601.6666666666666666]) + r2 = date_range(start=Timestamp('2000-02-27'), + periods=5, + freq='H').to_julian_date() + self.assert_(isinstance(r2, Float64Index)) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + r1 = Float64Index([2451601.5, + 2451601.5006944444444444, + 2451601.5013888888888888, + 2451601.5020833333333333, + 2451601.5027777777777777]) + r2 = date_range(start=Timestamp('2000-02-27'), + periods=5, + freq='T').to_julian_date() + self.assert_(isinstance(r2, Float64Index)) + tm.assert_index_equal(r1, r2) + + def test_second(self): + r1 = Float64Index([2451601.5, + 2451601.500011574074074, + 2451601.5000231481481481, + 2451601.5000347222222222, + 2451601.5000462962962962]) + r2 = date_range(start=Timestamp('2000-02-27'), + periods=5, + freq='S').to_julian_date() + self.assert_(isinstance(r2, Float64Index)) + tm.assert_index_equal(r1, r2) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 57df3c6651ad4..f065ea90473c6 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -384,6 +384,30 @@ class Timestamp(_Timestamp): or self.tzinfo is not None or self.nanosecond != 0) + def to_julian_date(self): + """ + Convert TimeStamp to a Julian Date. + 0 Julian date is noon January 1, 4713 BC. + """ + year = self.year + month = self.month + day = self.day + if month <= 2: + year -= 1 + month += 12 + return (day + + np.fix((153*month - 457)/5) + + 365*year + + np.floor(year / 4) - + np.floor(year / 100) + + np.floor(year / 400) + + 1721118.5 + + (self.hour + + self.minute/60.0 + + self.second/3600.0 + + self.microsecond/3600.0/1e+6 + + self.nanosecond/3600.0/1e+9 + )/24.0) _nat_strings = set(['NaT','nat','NAT','nan','NaN','NAN']) class NaTType(_NaT):