Skip to content

Commit 0debb02

Browse files
stoffprofbashtage
authored andcommitted
Fix Yahoo! price data (#489)
* Fix Yahoo! price data Adds back support for downloading price data from Yahoo!
1 parent 9acc0d0 commit 0debb02

File tree

3 files changed

+40
-34
lines changed

3 files changed

+40
-34
lines changed

docs/source/whatsnew/v0.7.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,5 @@ Bug Fixes
2828

2929
- Added support for passing the API KEY to QuandlReader either directly or by
3030
setting the environmental variable QUANDL_API_KEY (:issue:`485`).
31+
- Added back support for Yahoo! price data
3132
- Handle Morningstar index volume data properly (:issue:`486`).

pandas_datareader/data.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ def get_data_google(*args, **kwargs):
6060

6161

6262
def get_data_yahoo(*args, **kwargs):
63-
raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Actions'))
6463
return YahooDailyReader(*args, **kwargs).read()
6564

6665

pandas_datareader/yahoo/daily.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import re
2+
import json
23
import time
34
import warnings
45
import numpy as np
5-
from pandas import Panel
6+
from pandas import Panel, DataFrame, to_datetime
67
from pandas_datareader.base import (_DailyBaseReader, _in_chunks)
78
from pandas_datareader._utils import (RemoteDataError, SymbolWarning)
9+
import pandas.compat as compat
810

911

1012
class YahooDailyReader(_DailyBaseReader):
@@ -46,12 +48,13 @@ class YahooDailyReader(_DailyBaseReader):
4648

4749
def __init__(self, symbols=None, start=None, end=None, retry_count=3,
4850
pause=0.35, session=None, adjust_price=False,
49-
ret_index=False, chunksize=25, interval='d'):
51+
ret_index=False, chunksize=1, interval='d'):
5052
super(YahooDailyReader, self).__init__(symbols=symbols,
5153
start=start, end=end,
5254
retry_count=retry_count,
5355
pause=pause, session=session,
5456
chunksize=chunksize)
57+
5558
# Ladder up the wait time between subsequent requests to improve
5659
# probability of a successful retry
5760
self.pause_multiplier = 2.5
@@ -79,20 +82,14 @@ def __init__(self, symbols=None, start=None, end=None, retry_count=3,
7982
self.interval = 'wk'
8083

8184
self.interval = '1' + self.interval
82-
self.crumb = self._get_crumb(retry_count)
8385

8486
@property
8587
def service(self):
8688
return 'history'
8789

88-
@property
89-
def url(self):
90-
return 'https://query1.finance.yahoo.com/v7/finance/download/{}'\
91-
.format(self.symbols)
92-
9390
@staticmethod
9491
def yurl(symbol):
95-
return 'https://query1.finance.yahoo.com/v7/finance/download/{}'\
92+
return 'https://finance.yahoo.com/quote/{}/history'\
9693
.format(symbol)
9794

9895
def _get_params(self, symbol):
@@ -104,28 +101,49 @@ def _get_params(self, symbol):
104101
'period1': unix_start,
105102
'period2': unix_end,
106103
'interval': self.interval,
107-
'events': self.service,
108-
'crumb': self.crumb
104+
'frequency': self.interval,
105+
'filter': self.service
109106
}
110107
return params
111108

112109
def read(self):
110+
"""Read data"""
111+
# If a single symbol, (e.g., 'GOOG')
112+
if isinstance(self.symbols, (compat.string_types, int)):
113+
df = self._read_one_data(self.yurl(self.symbols),
114+
params=self._get_params(self.symbols))
115+
# Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
116+
elif isinstance(self.symbols, DataFrame):
117+
df = self._dl_mult_symbols(self.symbols.index)
118+
else:
119+
df = self._dl_mult_symbols(self.symbols)
120+
return df
121+
122+
def _read_one_data(self, url, params):
113123
""" read one data from specified URL """
114-
try:
115-
df = super(YahooDailyReader, self).read()
116-
if self.ret_index:
117-
df['Ret_Index'] = _calc_return_index(df['Adj Close'])
118-
if self.adjust_price:
119-
df = _adjust_prices(df)
120-
return df.sort_index().dropna(how='all')
121-
finally:
122-
self.close()
124+
resp = self._get_response(url, params=params)
125+
ptrn = r'root\.App\.main = (.*?);\n}\(this\)\);'
126+
jsn = json.loads(re.search(ptrn, resp.text, re.DOTALL).group(1))
127+
df = DataFrame(
128+
jsn['context']['dispatcher']['stores']
129+
['HistoricalPriceStore']['prices']
130+
)
131+
df['date'] = to_datetime(df['date'], unit='s').dt.date
132+
df = df.dropna(subset=['close'])
133+
df = df[['date', 'high', 'low', 'open', 'close',
134+
'volume', 'adjclose']]
135+
136+
if self.ret_index:
137+
df['Ret_Index'] = _calc_return_index(df['adjclose'])
138+
if self.adjust_price:
139+
df = _adjust_prices(df)
140+
return df.sort_index().dropna(how='all')
123141

124142
def _dl_mult_symbols(self, symbols):
125143
stocks = {}
126144
failed = []
127145
passed = []
128-
for sym_group in _in_chunks(symbols, self.chunksize):
146+
for sym_group in _in_chunks(symbols, 1): # ignoring chunksize
129147
for sym in sym_group:
130148
try:
131149
stocks[sym] = self._read_one_data(self.yurl(sym),
@@ -151,18 +169,6 @@ def _dl_mult_symbols(self, symbols):
151169
msg = "No data fetched using {0!r}"
152170
raise RemoteDataError(msg.format(self.__class__.__name__))
153171

154-
def _get_crumb(self, retries):
155-
# Scrape a history page for a valid crumb ID:
156-
tu = "https://finance.yahoo.com/quote/{}/history".format(self.symbols)
157-
response = self._get_response(tu,
158-
params=self.params, headers=self.headers)
159-
out = str(self._sanitize_response(response))
160-
# Matches: {"crumb":"AlphaNumeric"}
161-
rpat = '"CrumbStore":{"crumb":"([^"]+)"}'
162-
163-
crumb = re.findall(rpat, out)[0]
164-
return crumb.encode('ascii').decode('unicode-escape')
165-
166172

167173
def _adjust_prices(hist_data, price_list=None):
168174
"""

0 commit comments

Comments
 (0)