Skip to content

Commit 1b3592e

Browse files
Merge pull request #525 from davidastephens/data
Reinstate Yahoo API
2 parents da18fbd + 1c351ff commit 1b3592e

File tree

12 files changed

+216
-144
lines changed

12 files changed

+216
-144
lines changed

docs/source/whatsnew/v0.7.0.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ Enhancements
4242

4343
Backwards incompatible API changes
4444
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
45+
- When requesting multiple symbols from a DailyReader (ex: google, yahoo, IEX)
46+
a MultiIndex DataFrame is now returned. Previously Panel or dict of DataFrames
47+
were returned. (:issue:`297`).
48+
4549

4650
.. _whatsnew_070.bug_fixes:
4751

pandas_datareader/base.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import requests
66

77
import pandas.compat as compat
8-
from pandas import Panel, DataFrame
9-
from pandas import read_csv
8+
from pandas import DataFrame
9+
from pandas import read_csv, concat
1010
from pandas.io.common import urlencode
1111
from pandas.compat import StringIO, bytes_to_str
1212

@@ -239,7 +239,9 @@ def _dl_mult_symbols(self, symbols):
239239
df_na[:] = np.nan
240240
for sym in failed:
241241
stocks[sym] = df_na
242-
return Panel(stocks).swapaxes('items', 'minor')
242+
result = concat(stocks).unstack(level=0)
243+
result.columns.names = ['Attributes', 'Symbols']
244+
return result
243245
except AttributeError:
244246
# cannot construct a panel with just 1D nans indicating no data
245247
msg = "No data fetched using {0!r}"

pandas_datareader/data.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
RobinhoodQuoteReader
3333
from pandas_datareader.stooq import StooqDailyReader
3434
from pandas_datareader.tiingo import TiingoDailyReader, TiingoQuoteReader
35+
from pandas_datareader.yahoo.actions import (YahooActionReader, YahooDivReader)
3536
from pandas_datareader.yahoo.components import _get_data as \
3637
get_components_yahoo
3738
from pandas_datareader.yahoo.daily import YahooDailyReader
@@ -79,11 +80,11 @@ def get_quote_av(*args, **kwargs):
7980

8081

8182
def get_data_yahoo_actions(*args, **kwargs):
82-
raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Actions'))
83+
return YahooActionReader(*args, **kwargs).read()
8384

8485

8586
def get_quote_yahoo(*args, **kwargs):
86-
raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Actions'))
87+
raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Quotes'))
8788
return YahooQuotesReader(*args, **kwargs).read()
8889

8990

@@ -305,7 +306,6 @@ def DataReader(name, data_source=None, start=None, end=None,
305306
ff = DataReader("F-F_ST_Reversal_Factor", "famafrench")
306307
"""
307308
if data_source == "yahoo":
308-
raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Daily'))
309309
return YahooDailyReader(symbols=name, start=start, end=end,
310310
adjust_price=False, chunksize=25,
311311
retry_count=retry_count, pause=pause,
@@ -401,6 +401,18 @@ def DataReader(name, data_source=None, start=None, end=None,
401401
retry_count=retry_count, pause=pause,
402402
session=session,
403403
api_key=access_key).read()
404+
405+
elif data_source == "yahoo-actions":
406+
return YahooActionReader(symbols=name, start=start, end=end,
407+
retry_count=retry_count, pause=pause,
408+
session=session).read()
409+
410+
elif data_source == "yahoo-dividends":
411+
return YahooDivReader(symbols=name, start=start, end=end,
412+
adjust_price=False, chunksize=25,
413+
retry_count=retry_count, pause=pause,
414+
session=session, interval='d').read()
415+
404416
else:
405417
msg = "data_source=%r is not implemented" % data_source
406418
raise NotImplementedError(msg)

pandas_datareader/google/daily.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99

1010
class GoogleDailyReader(_DailyBaseReader):
1111
"""
12-
Returns DataFrame/Panel of historical stock prices from symbols, over date
13-
range, start to end. To avoid being penalized by Google Finance servers,
14-
pauses between downloading 'chunks' of symbols can be specified.
12+
Returns DataFrame of historical stock prices from
13+
symbols, over date range, start to end. To avoid being penalized by Google
14+
Finance servers, pauses between downloading 'chunks' of symbols can be
15+
specified.
1516
1617
Parameters
1718
----------

pandas_datareader/iex/daily.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
class IEXDailyReader(_DailyBaseReader):
1717

1818
"""
19-
Returns DataFrame/Panel of historical stock prices from symbols, over date
20-
range, start to end. To avoid being penalized by Google Finance servers,
21-
pauses between downloading 'chunks' of symbols can be specified.
19+
Returns DataFrame of historical stock prices
20+
from symbols, over date range, start to end. To avoid being penalized by
21+
IEX servers, pauses between downloading 'chunks' of symbols can be
22+
specified.
2223
2324
Parameters
2425
----------
@@ -110,5 +111,7 @@ def _read_lines(self, out):
110111
df = df.loc[sstart:send]
111112
result.update({symbol: df})
112113
if len(result) > 1:
114+
result = pd.concat(result).unstack(level=0)
115+
result.columns.names = ['Attributes', 'Symbols']
113116
return result
114117
return result[self.symbols]

pandas_datareader/stooq.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
class StooqDailyReader(_DailyBaseReader):
55

66
"""
7-
Returns DataFrame/Panel of historical stock prices from symbols, over date
8-
range, start to end.
7+
Returns DataFrame/dict of Dataframes of historical stock prices from
8+
symbols, over date range, start to end.
99
1010
Parameters
1111
----------
@@ -18,7 +18,7 @@ class StooqDailyReader(_DailyBaseReader):
1818
Time, in seconds, to pause between consecutive queries of chunks. If
1919
single value given for symbol, represents the pause between retries.
2020
chunksize : int, default 25
21-
Number of symbols to download consecutively before intiating pause.
21+
Number of symbols to download consecutively before initiating pause.
2222
session : Session, default None
2323
requests.sessions.Session instance to be used
2424

pandas_datareader/tests/google/test_google.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ def test_get_multi1(self):
126126
@skip_on_exception(RemoteDataError)
127127
def test_get_multi_invalid(self):
128128
sl = ['AAPL', 'AMZN', 'INVALID']
129-
pan = web.get_data_google(sl, '2012')
130-
assert 'INVALID' in pan.minor_axis
129+
data = web.get_data_google(sl, '2012')
130+
assert 'INVALID' in data.columns.levels[1]
131131

132132
def test_get_multi_all_invalid(self):
133133
sl = ['INVALID', 'INVALID2', 'INVALID3']

pandas_datareader/tests/test_iex_daily.py

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from datetime import datetime
22

3+
from pandas import DataFrame, MultiIndex
4+
35
import pytest
46

57
import pandas_datareader.data as web
6-
import pandas.util.testing as tm
78

89

910
class TestIEXDaily(object):
@@ -44,37 +45,26 @@ def test_single_symbol(self):
4445
def test_multiple_symbols(self):
4546
syms = ["AAPL", "MSFT", "TSLA"]
4647
df = web.DataReader(syms, "iex", self.start, self.end)
47-
assert sorted(list(df)) == syms
48+
assert sorted(list(df.columns.levels[1])) == syms
4849
for sym in syms:
49-
assert len(df[sym] == 578)
50+
assert len(df.xs(sym, level='Symbols', axis=1) == 578)
5051

5152
def test_multiple_symbols_2(self):
5253
syms = ["AAPL", "MSFT", "TSLA"]
5354
good_start = datetime(2017, 2, 9)
5455
good_end = datetime(2017, 5, 24)
5556
df = web.DataReader(syms, "iex", good_start, good_end)
56-
assert isinstance(df, dict)
57-
assert len(df) == 3
58-
assert sorted(list(df)) == syms
57+
assert isinstance(df, DataFrame)
58+
assert isinstance(df.columns, MultiIndex)
59+
assert len(df.columns.levels[1]) == 3
60+
assert sorted(list(df.columns.levels[1])) == syms
5961

60-
a = df["AAPL"]
61-
t = df["TSLA"]
62+
a = df.xs("AAPL", axis=1, level='Symbols')
63+
t = df.xs("TSLA", axis=1, level='Symbols')
6264

6365
assert len(a) == 73
6466
assert len(t) == 73
6567

66-
expected1 = a.loc["2017-02-09"]
67-
tm.assert_almost_equal(expected1["close"], 130.36,
68-
check_less_precise=True)
69-
tm.assert_almost_equal(expected1["high"], 130.38,
70-
check_less_precise=True)
71-
72-
expected2 = a.loc["2017-05-24"]
73-
tm.assert_almost_equal(expected2["close"], 151.57,
74-
check_less_precise=True)
75-
tm.assert_almost_equal(expected2["high"], 152.40,
76-
check_less_precise=True)
77-
7868
expected3 = t.loc["2017-02-09"]
7969
assert expected3["close"] == 269.20
8070
assert expected3["high"] == 271.18

pandas_datareader/tests/test_wb.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from pandas_datareader.compat import assert_raises_regex
1010
from pandas_datareader.wb import (search, download, get_countries,
1111
get_indicators, WorldBankReader)
12+
from pandas_datareader._utils import RemoteDataError
13+
from pandas_datareader._testing import skip_on_exception
1214

1315

1416
class TestWB(object):
@@ -215,6 +217,7 @@ def test_wdi_get_indicators(self):
215217
assert result.columns.equals(exp_col)
216218
assert len(result) > 10000
217219

220+
@skip_on_exception(RemoteDataError)
218221
def test_wdi_download_monthly(self):
219222
expected = {'COPPER': {('World', '2012M01'): 8040.47,
220223
('World', '2011M12'): 7565.48,

pandas_datareader/tests/yahoo/test_yahoo.py

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ class TestYahoo(object):
2121

2222
@classmethod
2323
def setup_class(cls):
24-
pytest.skip('Skip all Yahoo! tests.')
2524
pytest.importorskip("lxml")
2625

2726
@skip_on_exception(RemoteDataError)
@@ -30,7 +29,8 @@ def test_yahoo(self):
3029
start = datetime(2010, 1, 1)
3130
end = datetime(2013, 1, 25)
3231

33-
assert web.DataReader('F', 'yahoo', start, end)['Close'][-1] == 13.68
32+
assert round(web.DataReader('F', 'yahoo', start, end)['Close'][-1],
33+
2) == 13.68
3434

3535
def test_yahoo_fails(self):
3636
start = datetime(2010, 1, 1)
@@ -39,13 +39,15 @@ def test_yahoo_fails(self):
3939
with pytest.raises(Exception):
4040
web.DataReader('NON EXISTENT TICKER', 'yahoo', start, end)
4141

42+
@pytest.mark.skip('Yahoo quotes deprecated')
4243
def test_get_quote_series(self):
4344
try:
4445
df = web.get_quote_yahoo(pd.Series(['GOOG', 'AAPL', 'GOOG']))
4546
except ConnectionError:
4647
pytest.xfail(reason=XFAIL_REASON)
4748
tm.assert_series_equal(df.iloc[0], df.iloc[2])
4849

50+
@pytest.mark.skip('Yahoo quotes deprecated')
4951
def test_get_quote_string(self):
5052
_yahoo_codes.update({'MarketCap': 'j1'})
5153
try:
@@ -55,13 +57,15 @@ def test_get_quote_string(self):
5557

5658
assert not pd.isnull(df['MarketCap'][0])
5759

60+
@pytest.mark.skip('Yahoo quotes deprecated')
5861
def test_get_quote_stringlist(self):
5962
try:
6063
df = web.get_quote_yahoo(['GOOG', 'AAPL', 'GOOG'])
6164
except ConnectionError:
6265
pytest.xfail(reason=XFAIL_REASON)
6366
tm.assert_series_equal(df.iloc[0], df.iloc[2])
6467

68+
@pytest.mark.skip('Yahoo quotes deprecated')
6569
def test_get_quote_comma_name(self):
6670
_yahoo_codes.update({'name': 'n'})
6771
try:
@@ -113,6 +117,10 @@ def test_get_data_single_symbol(self):
113117
# just test that we succeed
114118
web.get_data_yahoo('GOOG')
115119

120+
@skip_on_exception(RemoteDataError)
121+
def test_data_with_no_actions(self):
122+
web.get_data_yahoo('TSLA')
123+
116124
@skip_on_exception(RemoteDataError)
117125
def test_get_data_adjust_price(self):
118126
goog = web.get_data_yahoo('GOOG')
@@ -163,23 +171,22 @@ def test_get_data_null_as_missing_data(self, adj_pr):
163171

164172
@skip_on_exception(RemoteDataError)
165173
def test_get_data_multiple_symbols_two_dates(self):
166-
pan = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12',
167-
'JAN-31-12')
168-
result = pan.Close['01-18-12'].T
174+
data = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12',
175+
'JAN-31-12')
176+
result = data.Close.loc['01-18-12'].T
169177
assert result.size == 3
170178

171179
# sanity checking
172-
assert result.dtypes.all() == np.floating
180+
assert result.dtypes == np.floating
173181

174182
expected = np.array([[18.99, 28.4, 25.18],
175183
[18.58, 28.31, 25.13],
176184
[19.03, 28.16, 25.52],
177185
[18.81, 28.82, 25.87]])
178-
df = pan.Open
186+
df = data.Open
179187
result = df[(df.index >= 'Jan-15-12') & (df.index <= 'Jan-20-12')]
180188
assert expected.shape == result.shape
181189

182-
@pytest.mark.xfail(reason="failing after #355")
183190
def test_get_date_ret_index(self):
184191
pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987',
185192
ret_index=True)
@@ -193,7 +200,6 @@ def test_get_date_ret_index(self):
193200
# sanity checking
194201
assert np.issubdtype(pan.values.dtype, np.floating)
195202

196-
@pytest.mark.xfail(reason="failing after #355")
197203
def test_get_data_yahoo_actions(self):
198204
start = datetime(1990, 1, 1)
199205
end = datetime(2000, 4, 5)
@@ -228,7 +234,6 @@ def test_yahoo_reader_class(self):
228234
r = YahooDailyReader('GOOG', session=session)
229235
assert r.session is session
230236

231-
@pytest.mark.xfail(reason="failing after #355")
232237
def test_yahoo_DataReader(self):
233238
start = datetime(2010, 1, 1)
234239
end = datetime(2015, 5, 9)
@@ -241,6 +246,22 @@ def test_yahoo_DataReader(self):
241246
'2013-08-08', '2013-05-09',
242247
'2013-02-07', '2012-11-07',
243248
'2012-08-09'])
249+
250+
exp = pd.DataFrame({'action': ['DIVIDEND', 'DIVIDEND', 'DIVIDEND',
251+
'DIVIDEND', 'SPLIT', 'DIVIDEND',
252+
'DIVIDEND', 'DIVIDEND',
253+
'DIVIDEND', 'DIVIDEND', 'DIVIDEND',
254+
'DIVIDEND', 'DIVIDEND'],
255+
'value': [0.52, 0.47, 0.47, 0.47, 0.14285714,
256+
3.29, 3.05, 3.05, 3.05,
257+
3.05, 2.65, 2.65, 2.65]},
258+
index=exp_idx)
259+
exp.index.name = 'Date'
260+
tm.assert_frame_equal(result.reindex_like(exp).round(5), exp.round(5))
261+
262+
result = web.get_data_yahoo_actions('AAPL', start, end,
263+
adjust_dividends=True)
264+
244265
exp = pd.DataFrame({'action': ['DIVIDEND', 'DIVIDEND', 'DIVIDEND',
245266
'DIVIDEND', 'SPLIT', 'DIVIDEND',
246267
'DIVIDEND', 'DIVIDEND',
@@ -251,13 +272,11 @@ def test_yahoo_DataReader(self):
251272
0.43571, 0.37857, 0.37857, 0.37857]},
252273
index=exp_idx)
253274
exp.index.name = 'Date'
254-
255-
tm.assert_frame_equal(result.reindex_like(exp), exp)
275+
tm.assert_frame_equal(result.reindex_like(exp).round(5), exp.round(5))
256276

257277
@skip_on_exception(RemoteDataError)
258278
def test_yahoo_DataReader_multi(self):
259279
start = datetime(2010, 1, 1)
260280
end = datetime(2015, 5, 9)
261-
262281
result = web.DataReader(['AAPL', 'F'], 'yahoo-actions', start, end)
263-
assert isinstance(result, pd.Panel)
282+
assert isinstance(result, dict)

0 commit comments

Comments
 (0)