1
1
import re
2
+ import json
2
3
import time
3
4
import warnings
4
5
import numpy as np
5
- from pandas import Panel
6
+ from pandas import Panel , DataFrame , to_datetime
6
7
from pandas_datareader .base import (_DailyBaseReader , _in_chunks )
7
8
from pandas_datareader ._utils import (RemoteDataError , SymbolWarning )
9
+ import pandas .compat as compat
8
10
9
11
10
12
class YahooDailyReader (_DailyBaseReader ):
@@ -46,12 +48,13 @@ class YahooDailyReader(_DailyBaseReader):
46
48
47
49
def __init__ (self , symbols = None , start = None , end = None , retry_count = 3 ,
48
50
pause = 0.35 , session = None , adjust_price = False ,
49
- ret_index = False , chunksize = 25 , interval = 'd' ):
51
+ ret_index = False , chunksize = 1 , interval = 'd' ):
50
52
super (YahooDailyReader , self ).__init__ (symbols = symbols ,
51
53
start = start , end = end ,
52
54
retry_count = retry_count ,
53
55
pause = pause , session = session ,
54
56
chunksize = chunksize )
57
+
55
58
# Ladder up the wait time between subsequent requests to improve
56
59
# probability of a successful retry
57
60
self .pause_multiplier = 2.5
@@ -79,20 +82,14 @@ def __init__(self, symbols=None, start=None, end=None, retry_count=3,
79
82
self .interval = 'wk'
80
83
81
84
self .interval = '1' + self .interval
82
- self .crumb = self ._get_crumb (retry_count )
83
85
84
86
@property
85
87
def service (self ):
86
88
return 'history'
87
89
88
- @property
89
- def url (self ):
90
- return 'https://query1.finance.yahoo.com/v7/finance/download/{}' \
91
- .format (self .symbols )
92
-
93
90
@staticmethod
94
91
def yurl (symbol ):
95
- return 'https://query1. finance.yahoo.com/v7/finance/download/{} ' \
92
+ return 'https://finance.yahoo.com/quote/{}/history ' \
96
93
.format (symbol )
97
94
98
95
def _get_params (self , symbol ):
@@ -104,28 +101,49 @@ def _get_params(self, symbol):
104
101
'period1' : unix_start ,
105
102
'period2' : unix_end ,
106
103
'interval' : self .interval ,
107
- 'events ' : self .service ,
108
- 'crumb ' : self .crumb
104
+ 'frequency ' : self .interval ,
105
+ 'filter ' : self .service
109
106
}
110
107
return params
111
108
112
109
def read (self ):
110
+ """Read data"""
111
+ # If a single symbol, (e.g., 'GOOG')
112
+ if isinstance (self .symbols , (compat .string_types , int )):
113
+ df = self ._read_one_data (self .yurl (self .symbols ),
114
+ params = self ._get_params (self .symbols ))
115
+ # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
116
+ elif isinstance (self .symbols , DataFrame ):
117
+ df = self ._dl_mult_symbols (self .symbols .index )
118
+ else :
119
+ df = self ._dl_mult_symbols (self .symbols )
120
+ return df
121
+
122
+ def _read_one_data (self , url , params ):
113
123
""" read one data from specified URL """
114
- try :
115
- df = super (YahooDailyReader , self ).read ()
116
- if self .ret_index :
117
- df ['Ret_Index' ] = _calc_return_index (df ['Adj Close' ])
118
- if self .adjust_price :
119
- df = _adjust_prices (df )
120
- return df .sort_index ().dropna (how = 'all' )
121
- finally :
122
- self .close ()
124
+ resp = self ._get_response (url , params = params )
125
+ ptrn = r'root\.App\.main = (.*?);\n}\(this\)\);'
126
+ jsn = json .loads (re .search (ptrn , resp .text , re .DOTALL ).group (1 ))
127
+ df = DataFrame (
128
+ jsn ['context' ]['dispatcher' ]['stores' ]
129
+ ['HistoricalPriceStore' ]['prices' ]
130
+ )
131
+ df ['date' ] = to_datetime (df ['date' ], unit = 's' ).dt .date
132
+ df = df .dropna (subset = ['close' ])
133
+ df = df [['date' , 'high' , 'low' , 'open' , 'close' ,
134
+ 'volume' , 'adjclose' ]]
135
+
136
+ if self .ret_index :
137
+ df ['Ret_Index' ] = _calc_return_index (df ['adjclose' ])
138
+ if self .adjust_price :
139
+ df = _adjust_prices (df )
140
+ return df .sort_index ().dropna (how = 'all' )
123
141
124
142
def _dl_mult_symbols (self , symbols ):
125
143
stocks = {}
126
144
failed = []
127
145
passed = []
128
- for sym_group in _in_chunks (symbols , self . chunksize ):
146
+ for sym_group in _in_chunks (symbols , 1 ): # ignoring chunksize
129
147
for sym in sym_group :
130
148
try :
131
149
stocks [sym ] = self ._read_one_data (self .yurl (sym ),
@@ -151,18 +169,6 @@ def _dl_mult_symbols(self, symbols):
151
169
msg = "No data fetched using {0!r}"
152
170
raise RemoteDataError (msg .format (self .__class__ .__name__ ))
153
171
154
- def _get_crumb (self , retries ):
155
- # Scrape a history page for a valid crumb ID:
156
- tu = "https://finance.yahoo.com/quote/{}/history" .format (self .symbols )
157
- response = self ._get_response (tu ,
158
- params = self .params , headers = self .headers )
159
- out = str (self ._sanitize_response (response ))
160
- # Matches: {"crumb":"AlphaNumeric"}
161
- rpat = '"CrumbStore":{"crumb":"([^"]+)"}'
162
-
163
- crumb = re .findall (rpat , out )[0 ]
164
- return crumb .encode ('ascii' ).decode ('unicode-escape' )
165
-
166
172
167
173
def _adjust_prices (hist_data , price_list = None ):
168
174
"""
0 commit comments