Skip to content

Commit 1647d90

Browse files
committed
Rebuild sanitize_dates and refactor AlphaVantage date handling
1 parent bdd4104 commit 1647d90

File tree

6 files changed

+93
-16
lines changed

6 files changed

+93
-16
lines changed

docs/source/remote_data.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,9 @@ Historical Time Series Data
140140
Through the
141141
`Alpha Vantage <https://www.alphavantage.co/documentation>`__ Time Series
142142
endpoints, it is possible to obtain historical equities data for individual
143-
symbols. The following endpoints are available:
143+
symbols. For daily, weekly, and monthly frequencies, 20+ years of historical data is available. The past 3-5 days of intraday data is also available.
144+
145+
The following endpoints are available:
144146

145147
* ``av-intraday`` - Intraday Time Series
146148
* ``av-daily`` - Daily Time Series

pandas_datareader/_utils.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,16 @@ class RemoteDataError(IOError):
1616

1717
def _sanitize_dates(start, end):
1818
"""
19-
Return (datetime_start, datetime_end) tuple
20-
if start is None - default is 2010/01/01
19+
Return (timestamp_start, timestamp_end) tuple
20+
if start is None - default is 5 years before the current date
2121
if end is None - default is today
22+
23+
Parameters
24+
----------
25+
start: str, int, date, datetime, timestamp
26+
Desired start date
27+
end: str, int, date, datetime, timestamp
28+
Desired end date
2229
"""
2330
if is_number(start):
2431
# regard int as year
@@ -30,11 +37,20 @@ def _sanitize_dates(start, end):
3037
end = to_datetime(end)
3138

3239
if start is None:
33-
start = dt.datetime(2010, 1, 1)
40+
# default to 5 years before today
41+
today = dt.date.today()
42+
start = today - dt.timedelta(days=365 * 5)
3443
if end is None:
35-
end = dt.datetime.today()
44+
# default to today
45+
end = dt.date.today()
3646
if start > end:
3747
raise ValueError("start must be an earlier date than end")
48+
49+
try:
50+
start = to_datetime(start)
51+
end = to_datetime(end)
52+
except (TypeError, ValueError):
53+
raise ValueError("Invalid date format.")
3854
return start, end
3955

4056

pandas_datareader/av/time_series.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime
1+
import datetime as dt
22

33
from pandas_datareader.av import AlphaVantage
44

@@ -13,11 +13,11 @@ class AVTimeSeriesReader(AlphaVantage):
1313
----------
1414
symbols : string
1515
Single stock symbol (ticker)
16-
start : string, (defaults to '1/1/2010')
17-
Starting date, timestamp. Parses many different kind of date
16+
start : string, int, date, datetime, timestamp
17+
Starting date. Parses many different kind of date
1818
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
19-
end : string, (defaults to today)
20-
Ending date, timestamp. Same format as starting date.
19+
end : string, int, date, datetime, timestamp
20+
Ending date
2121
retry_count : int, default 3
2222
Number of times to retry query request.
2323
pause : int, default 0.1
@@ -52,6 +52,7 @@ def __init__(
5252
chunksize=25,
5353
api_key=None,
5454
):
55+
self._func = function
5556
super(AVTimeSeriesReader, self).__init__(
5657
symbols=symbols,
5758
start=start,
@@ -62,19 +63,26 @@ def __init__(
6263
api_key=api_key,
6364
)
6465

65-
self._func = function
66+
@property
67+
def default_start_date(self):
68+
d_days = 3 if self.intraday else 365 * 20
69+
return dt.datetime.today() - dt.timedelta(days=d_days)
6670

6771
@property
6872
def function(self):
6973
return self._func
7074

75+
@property
76+
def intraday(self):
77+
return True if self.function == "TIME_SERIES_INTRADAY" else False
78+
7179
@property
7280
def output_size(self):
7381
""" Used to limit the size of the Alpha Vantage query when
7482
possible.
7583
"""
76-
delta = datetime.now() - self.start
77-
return "full" if delta.days > 80 else "compact"
84+
delta = dt.datetime.now() - self.start
85+
return "compact" if delta.days < 80 and not self.intraday else "full"
7886

7987
@property
8088
def data_key(self):
@@ -88,7 +96,7 @@ def params(self):
8896
"apikey": self.api_key,
8997
"outputsize": self.output_size,
9098
}
91-
if self.function == "TIME_SERIES_INTRADAY":
99+
if self.intraday:
92100
p.update({"interval": "1min"})
93101
return p
94102

pandas_datareader/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
import time
23
import warnings
34

@@ -27,7 +28,7 @@ class _BaseReader(object):
2728
----------
2829
symbols : {str, List[str]}
2930
String symbol of like of symbols
30-
start : string, (defaults to '1/1/2010')
31+
start : string, (defaults to 5 years before current date)
3132
Starting date, timestamp. Parses many different kind of date
3233
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
3334
end : string, (defaults to today)
@@ -59,7 +60,7 @@ def __init__(
5960

6061
self.symbols = symbols
6162

62-
start, end = _sanitize_dates(start, end)
63+
start, end = _sanitize_dates(start or self.default_start_date, end)
6364
self.start = start
6465
self.end = end
6566

@@ -76,6 +77,12 @@ def close(self):
7677
"""Close network session"""
7778
self.session.close()
7879

80+
@property
81+
def default_start_date(self):
82+
"""Default start date for reader. Defaults to 5 years before current date"""
83+
today = datetime.date.today()
84+
return today - datetime.timedelta(days=365 * 5)
85+
7986
@property
8087
def url(self):
8188
"""API URL"""

pandas_datareader/tests/test_base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime as dt
12
import pytest
23
import requests
34

@@ -26,6 +27,10 @@ def test_invalid_format(self):
2627
b._format = "IM_NOT_AN_IMPLEMENTED_TYPE"
2728
b._read_one_data("a", None)
2829

30+
def test_default_start_date(self):
31+
b = base._BaseReader([])
32+
assert b.default_start_date == dt.date.today() - dt.timedelta(days=365 * 5)
33+
2934

3035
class TestDailyBaseReader(object):
3136
def test_get_params(self):

pandas_datareader/tests/test_utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import datetime as dt
2+
import pandas as pd
3+
import pytest
4+
5+
from pandas_datareader._utils import _sanitize_dates
6+
7+
8+
class TestUtils(object):
9+
@pytest.mark.parametrize(
10+
"input_date", ["2019-01-01", dt.datetime(2019, 1, 1), dt.date(2019, 1, 1),
11+
pd.Timestamp(2019, 1, 1)]
12+
)
13+
def test_sanitize_dates(self, input_date):
14+
expected_start = pd.to_datetime(input_date)
15+
expected_end = pd.to_datetime(dt.date.today())
16+
result = _sanitize_dates(input_date, None)
17+
assert result == (expected_start, expected_end)
18+
19+
def test_sanitize_dates_int(self):
20+
start_int = 2018
21+
end_int = 2019
22+
expected_start = pd.to_datetime(dt.datetime(start_int, 1, 1))
23+
expected_end = pd.to_datetime(dt.datetime(end_int, 1, 1))
24+
assert _sanitize_dates(start_int, end_int) == (expected_start, expected_end)
25+
26+
def test_sanitize_invalid_dates(self):
27+
with pytest.raises(ValueError):
28+
_sanitize_dates(2019, 2018)
29+
30+
with pytest.raises(ValueError):
31+
_sanitize_dates("2019-01-01", "2018-01-01")
32+
33+
with pytest.raises(ValueError):
34+
_sanitize_dates("20199", None)
35+
36+
def test_sanitize_dates_defaults(self):
37+
default_start = pd.to_datetime(dt.date.today() - dt.timedelta(days=365 * 5))
38+
default_end = pd.to_datetime(dt.date.today())
39+
assert _sanitize_dates(None, None) == (default_start, default_end)

0 commit comments

Comments
 (0)