Skip to content

Commit c36c8f0

Browse files
Mottlbashtage
authored andcommitted
Multiple symbols for MOEX data connector (#563)
* Add support for multiple symbols in MOEX datareader * Updated whatsnew for MOEX
1 parent dc4c3b0 commit c36c8f0

File tree

2 files changed

+102
-82
lines changed

2 files changed

+102
-82
lines changed

docs/source/whatsnew/v0.7.0.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ Enhancements
4343
top-level function ``get_quote_av``.
4444
(:issue:`389`)
4545

46+
- MOEX data connector now supports multiple symbols in constructor.
47+
(:issue:`562`)
48+
4649
.. _whatsnew_070.api_breaking:
4750

4851
Backwards incompatible API changes

pandas_datareader/moex.py

Lines changed: 99 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33
import datetime as dt
44

5-
from pandas import read_csv, compat
5+
import pandas as pd
66
from pandas.compat import StringIO
77

88
from pandas_datareader.base import _DailyBaseReader
9+
from pandas_datareader.compat import is_list_like
910

1011

1112
class MoexReader(_DailyBaseReader):
@@ -43,21 +44,26 @@ def __init__(self, *args, **kwargs):
4344
self.start = self.start.date()
4445
self.end_dt = self.end
4546
self.end = self.end.date()
46-
if not isinstance(self.symbols, compat.string_types):
47-
raise ValueError("Support for multiple symbols is not yet implemented.")
47+
if not is_list_like(self.symbols):
48+
self.symbols = [self.symbols]
49+
self.__engines, self.__markets = {}, {} # dicts for engines and markets
4850

4951
__url_metadata = "https://iss.moex.com/iss/securities/{symbol}.csv"
5052
__url_data = "https://iss.moex.com/iss/history/engines/{engine}/" \
5153
"markets/{market}/securities/{symbol}.csv"
5254

5355
@property
5456
def url(self):
55-
"""API URL"""
56-
return self.__url_data.format(
57-
engine=self.__engine,
58-
market=self.__market,
59-
symbol=self.symbols
60-
)
57+
"""Return a list of API URLs per symbol"""
58+
59+
if not self.__engines or not self.__markets:
60+
raise Exception("Accesing url property accessed before "
61+
"invocation of read() or _get_metadata() methods")
62+
63+
return [self.__url_data.format(
64+
engine=self.__engines[s],
65+
market=self.__markets[s],
66+
symbol=s) for s in self.symbols]
6167

6268
def _get_params(self, start):
6369
params = {
@@ -78,96 +84,107 @@ def _get_params(self, start):
7884
return params
7985

8086
def _get_metadata(self):
81-
""" get a market and an engine for a given symbol """
82-
response = self._get_response(
83-
self.__url_metadata.format(symbol=self.symbols)
84-
)
85-
text = self._sanitize_response(response)
86-
if len(text) == 0:
87-
service = self.__class__.__name__
88-
raise IOError("{} request returned no data; check URL for invalid "
89-
"inputs: {}".format(service, self.__url_metadata))
90-
if isinstance(text, compat.binary_type):
91-
text = text.decode('windows-1251')
92-
else:
93-
text = text
94-
95-
header_str = 'secid;boardid;'
96-
get_data = False
97-
for s in text.splitlines():
98-
if s.startswith(header_str):
99-
get_data = True
100-
continue
101-
if get_data and s != '':
102-
fields = s.split(';')
103-
return fields[5], fields[7]
104-
service = self.__class__.__name__
105-
raise IOError("{} request returned no metadata: {}\n"
106-
"Typo in security symbol `{}`?".format(
107-
service,
108-
self.__url_metadata.format(symbol=self.symbols),
109-
self.symbols
110-
)
111-
)
87+
"""Get markets and engines for the given symbols"""
88+
89+
markets, engines = {}, {}
90+
91+
for symbol in self.symbols:
92+
response = self._get_response(
93+
self.__url_metadata.format(symbol=symbol)
94+
)
95+
text = self._sanitize_response(response)
96+
if len(text) == 0:
97+
service = self.__class__.__name__
98+
raise IOError("{} request returned no data; check URL for invalid "
99+
"inputs: {}".format(service, self.__url_metadata))
100+
if isinstance(text, pd.compat.binary_type):
101+
text = text.decode('windows-1251')
102+
103+
header_str = 'secid;boardid;'
104+
get_data = False
105+
for s in text.splitlines():
106+
if s.startswith(header_str):
107+
get_data = True
108+
continue
109+
if get_data and s != '':
110+
fields = s.split(';')
111+
markets[symbol], engines[symbol] = fields[5], fields[7]
112+
break
113+
if symbol not in markets or symbol not in engines:
114+
raise IOError("{} request returned no metadata: {}\n"
115+
"Typo in the security symbol `{}`?".format(
116+
self.__class__.__name__,
117+
self.__url_metadata.format(symbol=symbol),
118+
symbol))
119+
return markets, engines
112120

113121
def read(self):
114122
"""Read data"""
115-
try:
116-
self.__market, self.__engine = self._get_metadata()
117-
118-
out_list = []
119-
date_column = None
120-
while True: # read in loop with small date intervals
121-
if len(out_list) > 0:
122-
if date_column is None:
123-
date_column = out_list[0].split(';').index('TRADEDATE')
124-
125-
# get the last downloaded date
126-
start_str = out_list[-1].split(';', 4)[date_column]
127-
start = dt.datetime.strptime(start_str, '%Y-%m-%d').date()
128-
else:
129-
start_str = self.start.strftime('%Y-%m-%d')
130-
start = self.start
131-
132-
if start >= self.end or start >= dt.date.today():
133-
break
134123

135-
params = self._get_params(start_str)
136-
strings_out = self._read_url_as_String(self.url, params) \
137-
.splitlines()[2:]
138-
strings_out = list(filter(lambda x: x.strip(), strings_out))
139-
140-
if len(out_list) == 0:
141-
out_list = strings_out
142-
if len(strings_out) < 101:
143-
break
144-
else:
145-
out_list += strings_out[1:] # remove CSV head line
146-
if len(strings_out) < 100:
124+
try:
125+
self.__markets, self.__engines = self._get_metadata()
126+
urls = self.url # generate urls per symbols
127+
dfs = [] # an array of pandas dataframes per symbol to concatenate
128+
129+
for i, symbol in enumerate(self.symbols):
130+
out_list = []
131+
date_column = None
132+
133+
while True: # read in a loop with small date intervals
134+
if len(out_list) > 0:
135+
if date_column is None:
136+
date_column = out_list[0].split(';').index('TRADEDATE')
137+
138+
# get the last downloaded date
139+
start_str = out_list[-1].split(';', 4)[date_column]
140+
start = dt.datetime.strptime(start_str, '%Y-%m-%d').date()
141+
else:
142+
start_str = self.start.strftime('%Y-%m-%d')
143+
start = self.start
144+
145+
if start >= self.end or start >= dt.date.today():
147146
break
148-
str_io = StringIO('\r\n'.join(out_list))
149-
df = self._read_lines(str_io)
150-
return df
147+
148+
params = self._get_params(start_str)
149+
strings_out = self._read_url_as_String(urls[i], params) \
150+
.splitlines()[2:]
151+
strings_out = list(filter(lambda x: x.strip(), strings_out))
152+
153+
if len(out_list) == 0:
154+
out_list = strings_out
155+
if len(strings_out) < 101: # all data received - break
156+
break
157+
else:
158+
out_list += strings_out[1:] # remove a CSV head line
159+
if len(strings_out) < 100: # all data recevied - break
160+
break
161+
str_io = StringIO('\r\n'.join(out_list))
162+
dfs.append(self._read_lines(str_io)) # add a new DataFrame
151163
finally:
152164
self.close()
153165

166+
if len(dfs) > 1:
167+
return pd.concat(dfs, axis=0, join='outer', sort=True)
168+
else:
169+
return dfs[0]
170+
154171
def _read_url_as_String(self, url, params=None):
155-
""" Open url (and retry) """
172+
""" Open an url (and retry) """
173+
156174
response = self._get_response(url, params=params)
157175
text = self._sanitize_response(response)
158176
if len(text) == 0:
159177
service = self.__class__.__name__
160178
raise IOError("{} request returned no data; check URL for invalid "
161179
"inputs: {}".format(service, self.url))
162-
if isinstance(text, compat.binary_type):
163-
out = text.decode('windows-1251')
164-
else:
165-
out = text
166-
return out
180+
if isinstance(text, pd.compat.binary_type):
181+
text = text.decode('windows-1251')
182+
return text
167183

168184
def _read_lines(self, input):
169-
""" return pandas DataFrame from input """
170-
rs = read_csv(input, index_col='TRADEDATE', parse_dates=True, sep=';',
185+
""" Return a pandas DataFrame from input """
186+
187+
rs = pd.read_csv(input, index_col='TRADEDATE', parse_dates=True, sep=';',
171188
na_values=('-', 'null'))
172189
# Get rid of unicode characters in index name.
173190
try:

0 commit comments

Comments
 (0)