Skip to content

Commit 08e1545

Browse files
author
David Stephens
committed
ENH: Add requests session to Options
1 parent ccc5aa9 commit 08e1545

File tree

6 files changed

+43
-28
lines changed

6 files changed

+43
-28
lines changed

docs/source/whatsnew/v0.2.1.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@ New features
1919
~~~~~~~~~~~~
2020

2121
- ``DataReader`` now supports Eurostat data sources, see :ref:`here<remote_data.eurostat>` (:issue:`101`).
22+
- ``Options`` downloading is approximately 4x faster as a result of a rewrite of the parsing function. (:issue: `122`)
2223

2324
.. _whatsnew_021.api_breaking:
2425

2526
Backwards incompatible API changes
2627
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
28+
- ``Options`` columns ``PctChg`` and ``IV`` (Implied Volatility) are now type float
29+
rather than string. (:issue: `122`)
2730

2831
.. _whatsnew_021.bug_fixes:
2932

pandas_datareader/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import datetime as dt
55

66
import requests
7+
from requests_file import FileAdapter
78

89
from pandas import to_datetime
910
import pandas.compat as compat
@@ -55,6 +56,7 @@ def __init__(self, symbols, start=None, end=None,
5556
def _init_session(self, session, retry_count):
5657
if session is None:
5758
session = requests.Session()
59+
session.mount('file://', FileAdapter())
5860
# do not set requests max_retries here to support arbitrary pause
5961
return session
6062

@@ -197,4 +199,4 @@ def _in_chunks(seq, size):
197199
"""
198200
Return sequence in 'chunks' of size defined by size
199201
"""
200-
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
202+
return (seq[pos:pos + size] for pos in range(0, len(seq), size))

pandas_datareader/data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,12 @@ def DataReader(name, data_source=None, start=None, end=None,
128128

129129

130130

131-
def Options(symbol, data_source=None):
131+
def Options(symbol, data_source=None, session=None):
132132
if data_source is None:
133133
warnings.warn("Options(symbol) is deprecated, use Options(symbol,"
134134
" data_source) instead", FutureWarning, stacklevel=2)
135135
data_source = "yahoo"
136136
if data_source == "yahoo":
137-
return YahooOptions(symbol)
137+
return YahooOptions(symbol, session=session)
138138
else:
139139
raise NotImplementedError("currently only yahoo supported")

pandas_datareader/tests/test_data.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -351,9 +351,9 @@ def setUpClass(cls):
351351
cls.month = 1
352352
cls.expiry = datetime(cls.year, cls.month, 1)
353353
cls.dirpath = tm.get_data_path()
354-
cls.html1 = os.path.join(cls.dirpath, 'yahoo_options1.html')
355-
cls.html2 = os.path.join(cls.dirpath, 'yahoo_options2.html')
356-
cls.html3 = os.path.join(cls.dirpath, 'yahoo_options3.html') #Empty table GH#22
354+
cls.html1 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options1.html')
355+
cls.html2 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options2.html')
356+
cls.html3 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options3.html') #Empty table GH#22
357357
cls.data1 = cls.aapl._option_frames_from_url(cls.html1)['puts']
358358

359359
@classmethod
@@ -381,7 +381,7 @@ def test_get_near_stock_price(self):
381381
self.assertTrue(len(options) > 1)
382382

383383
def test_options_is_not_none(self):
384-
option = web.Options('aapl')
384+
option = web.Options('aapl', 'yahoo')
385385
self.assertTrue(option is not None)
386386

387387
def test_get_call_data(self):

pandas_datareader/yahoo/options.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,34 @@
77
from pandas import concat, DatetimeIndex, Series
88
from pandas.tseries.offsets import MonthEnd
99
from pandas.util.testing import _network_error_classes
10+
from pandas.io.parsers import TextParser
11+
from pandas import DataFrame
1012

1113
from pandas_datareader._utils import RemoteDataError
14+
from pandas_datareader.base import _BaseReader
1215

1316
# Items needed for options class
1417
CUR_MONTH = dt.datetime.now().month
1518
CUR_YEAR = dt.datetime.now().year
1619
CUR_DAY = dt.datetime.now().day
1720

18-
1921
def _two_char(s):
2022
return '{0:0>2}'.format(s)
2123

24+
def _unpack(row, kind='td'):
25+
return [val.text_content().strip() for val in row.findall(kind)]
26+
27+
def _parse_options_data(table):
28+
header = table.findall('thead/tr')
29+
header = _unpack(header[0], kind='th')
30+
rows = table.findall('tbody/tr')
31+
data = [_unpack(r) for r in rows]
32+
if len(data) > 0:
33+
return TextParser(data, names=header).get_chunk()
34+
else: #Empty table
35+
return DataFrame(columns=header)
2236

23-
class Options(object):
37+
class Options(_BaseReader):
2438
"""
2539
***Experimental***
2640
This class fetches call/put data for a given stock/expiry month.
@@ -62,13 +76,13 @@ class Options(object):
6276
>>> all_data = aapl.get_all_data()
6377
"""
6478

65-
_TABLE_LOC = {'calls': 1, 'puts': 2}
6679
_OPTIONS_BASE_URL = 'http://finance.yahoo.com/q/op?s={sym}'
6780
_FINANCE_BASE_URL = 'http://finance.yahoo.com'
6881

69-
def __init__(self, symbol):
82+
def __init__(self, symbol, session=None):
7083
""" Instantiates options_data with a ticker saved as symbol """
7184
self.symbol = symbol.upper()
85+
super(Options, self).__init__(symbols=symbol, session=session)
7286

7387
def get_options_data(self, month=None, year=None, expiry=None):
7488
"""
@@ -156,20 +170,19 @@ def _yahoo_url_from_expiry(self, expiry):
156170
return self._FINANCE_BASE_URL + expiry_links[expiry]
157171

158172
def _option_frames_from_url(self, url):
159-
frames = read_html(url)
160-
nframes = len(frames)
161-
frames_req = max(self._TABLE_LOC.values())
162-
if nframes < frames_req:
163-
raise RemoteDataError("%s options tables found (%s expected)" % (nframes, frames_req))
173+
174+
root = self._parse_url(url)
175+
calls = root.xpath('//*[@id="optionsCallsTable"]/div[2]/div/table')[0]
176+
puts = root.xpath('//*[@id="optionsPutsTable"]/div[2]/div/table')[0]
164177

165178
if not hasattr(self, 'underlying_price'):
166179
try:
167180
self.underlying_price, self.quote_time = self._underlying_price_and_time_from_url(url)
168181
except IndexError:
169182
self.underlying_price, self.quote_time = np.nan, np.nan
170183

171-
calls = frames[self._TABLE_LOC['calls']]
172-
puts = frames[self._TABLE_LOC['puts']]
184+
calls = _parse_options_data(calls)
185+
puts = _parse_options_data(puts)
173186

174187
calls = self._process_data(calls, 'call')
175188
puts = self._process_data(puts, 'put')
@@ -648,15 +661,10 @@ def _parse_url(self, url):
648661
except ImportError: # pragma: no cover
649662
raise ImportError("Please install lxml if you want to use the "
650663
"{0!r} class".format(self.__class__.__name__))
651-
try:
652-
doc = parse(url)
653-
except _network_error_classes: # pragma: no cover
654-
raise RemoteDataError("Unable to parse URL "
655-
"{0!r}".format(url))
656-
else:
657-
root = doc.getroot()
658-
if root is None: # pragma: no cover
659-
raise RemoteDataError("Parsed URL {0!r} has no root"
664+
doc = parse(self._read_url_as_StringIO(url))
665+
root = doc.getroot()
666+
if root is None: # pragma: no cover
667+
raise RemoteDataError("Parsed URL {0!r} has no root"
660668
"element".format(url))
661669
return root
662670

@@ -682,6 +690,8 @@ def _process_data(self, frame, type):
682690
frame['Underlying_Price'] = np.nan
683691
frame["Quote_Time"] = np.nan
684692
frame.rename(columns={'Open Int': 'Open_Int'}, inplace=True)
693+
frame['IV'] = frame['IV'].str.replace(',','').str.strip('%').astype(float)/100
694+
frame['PctChg'] = frame['PctChg'].str.replace(',','').str.strip('%').astype(float)/100
685695
frame['Type'] = type
686696
frame.set_index(['Strike', 'Expiry', 'Type', 'Symbol'], inplace=True)
687697

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def readme():
2424
return f.read()
2525

2626
INSTALL_REQUIRES = (
27-
['pandas', 'requests']
27+
['pandas', 'requests', 'requests-file']
2828
)
2929

3030
setup(

0 commit comments

Comments
 (0)