Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions pandas/io/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from zipfile import ZipFile
from pandas.util.py3compat import StringIO, BytesIO, bytes_to_str

import pandas as pd
from pandas import Panel, DataFrame, Series, read_csv, concat
from pandas.io.parsers import TextParser

Expand Down Expand Up @@ -115,7 +116,7 @@ def get_quote_yahoo(symbols):
return None

for line in lines:
fields = line.strip().split(',')
fields = line.decode('utf-8').strip().split(',')
for i, field in enumerate(fields):
if field[-2:] == '%"':
data[header[i]].append(float(field.strip('"%')))
Expand All @@ -133,7 +134,7 @@ def get_quote_yahoo(symbols):


def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
pause=0):
pause=0, **kwargs):
"""
Get historical data for the given name from yahoo.
Date format is datetime
Expand Down Expand Up @@ -195,11 +196,22 @@ def _adjust_prices(hist_data, price_list=['Open', 'High', 'Low', 'Close']):

def _calc_return_index(price_df):
"""
Return a returns index from a input price df or series.
Return a returns index from a input price df or series. Intial value
(typically NaN) is set to 1.
"""

ret_index = price_df.pct_change().add(1).cumprod()
ret_index.ix[0] = 1
df = price_df.pct_change().add(1).cumprod()
mask = ~df.ix[1].isnull() & df.ix[0].isnull()
df.ix[0][mask] = 1

#Check for first stock listings after starting date of index in ret_index
#If True, find first_valid_index and set previous entry to 1.
if(~mask).any:
for sym in mask.index[~mask]:
tstamp = df[sym].first_valid_index()
t_idx = df.index.get_loc(tstamp) - 1
df[sym].ix[t_idx] = 1

ret_index = df
return ret_index


Expand Down Expand Up @@ -241,7 +253,7 @@ def get_components_yahoo(idx_sym):
#break when no new components are found
while (True in mask):
urlStr = url.format(idx_mod, stats, comp_idx)
lines = (urllib.urlopen(urlStr).read().strip().
lines = (urllib.urlopen(urlStr).read().decode('utf-8').strip().
strip('"').split('"\r\n"'))

lines = [line.strip().split('","') for line in lines]
Expand All @@ -258,16 +270,17 @@ def get_components_yahoo(idx_sym):


def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0,
adjust_price=False, ret_index=False, chunksize=25, **kwargs):
adjust_price=False, ret_index=False, chunksize=25,
**kwargs):
"""
Returns DataFrame/Panel of historical stock prices from symbols, over date
range, start to end. To avoid being penalized by Yahoo! Finance servers,
pauses between downloading 'chunks' of symbols can be specified.

Parameters
----------
symbols : string, list-like object (list, tupel, Series), or DataFrame
Single stock symbol (ticker), list-like object of symbols or
symbols : string, array-like object (list, tupel, Series), or DataFrame
Single stock symbol (ticker), array-like object of symbols or
DataFrame with index containing stock symbols.
start : string, (defaults to '1/1/2010')
Starting date, timestamp. Parses many different kind of date
Expand All @@ -290,7 +303,7 @@ def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0,

Returns
-------
hist_data : DataFrame (str) or Panel (list-like object, DataFrame)
hist_data : DataFrame (str) or Panel (array-like object, DataFrame)
"""

def dl_mult_symbols(symbols):
Expand Down
35 changes: 15 additions & 20 deletions pandas/io/tests/test_yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def test_get_components(self):
assert 'GOOG' in df.index
assert 'AMZN' in df.index


@slow
@network
def test_get_data(self):
Expand All @@ -83,8 +84,6 @@ def test_get_data(self):
sl = ['AAPL', 'AMZN', 'GOOG']
pan = web.get_data_yahoo(sl, '2012')
ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
# the provider results are subject to change, disabled. GH2847
# assert result == expected
assert ts[0].dayofyear == 96

dfi = web.get_components_yahoo('^DJI')
Expand All @@ -93,33 +92,29 @@ def test_get_data(self):
result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
assert result == expected

pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12',
adjust_price=True)
expected = [18.38, 27.45, 24.54]
result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
# the provider results are subject to change, disabled. GH2847
# assert result == expected

# sanity checking
t= np.array(result)
assert np.issubdtype(t.dtype, np.floating)
assert t.shape == (3,)

pan = web.get_data_yahoo(dfi, '2011', ret_index=True)
d = [[ 1.01757469, 1.01130524, 1.02414183],
[ 1.00292912, 1.00770812, 1.01735194],
[ 1.00820152, 1.00462487, 1.01320257],
[ 1.08025776, 0.99845838, 1.00113165]]

expected = pd.DataFrame(d)
result = pan.Ret_Index.ix['01-18-11':'01-21-11'][['GE', 'INTC', 'MSFT']]
# the provider results are subject to change, disabled. GH2847
# assert_almost_equal(result.values, expected.values)
expected = [[ 18.99, 28.4 , 25.18],
[ 18.58, 28.31, 25.13],
[ 19.03, 28.16, 25.52],
[ 18.81, 28.82, 25.87]]
result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values
assert (result == expected).all()

#Check ret_index
pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987',
ret_index=True)
tstamp = pan.Ret_Index.INTC.first_valid_index()
result = pan.Ret_Index.ix[tstamp]['INTC']
expected = 1.0
assert result == expected

# sanity checking
t= np.array(result)
assert np.issubdtype(t.dtype, np.floating)
assert t.shape == (4, 3)


if __name__ == '__main__':
Expand Down