2
2
3
3
import datetime as dt
4
4
5
- from pandas import read_csv , compat
5
+ import pandas as pd
6
6
from pandas .compat import StringIO
7
7
8
8
from pandas_datareader .base import _DailyBaseReader
9
+ from pandas_datareader .compat import is_list_like
9
10
10
11
11
12
class MoexReader (_DailyBaseReader ):
@@ -43,21 +44,26 @@ def __init__(self, *args, **kwargs):
43
44
self .start = self .start .date ()
44
45
self .end_dt = self .end
45
46
self .end = self .end .date ()
46
- if not isinstance (self .symbols , compat .string_types ):
47
- raise ValueError ("Support for multiple symbols is not yet implemented." )
47
+ if not is_list_like (self .symbols ):
48
+ self .symbols = [self .symbols ]
49
+ self .__engines , self .__markets = {}, {} # dicts for engines and markets
48
50
49
51
__url_metadata = "https://iss.moex.com/iss/securities/{symbol}.csv"
50
52
__url_data = "https://iss.moex.com/iss/history/engines/{engine}/" \
51
53
"markets/{market}/securities/{symbol}.csv"
52
54
53
55
@property
54
56
def url (self ):
55
- """API URL"""
56
- return self .__url_data .format (
57
- engine = self .__engine ,
58
- market = self .__market ,
59
- symbol = self .symbols
60
- )
57
+ """Return a list of API URLs per symbol"""
58
+
59
+ if not self .__engines or not self .__markets :
60
+ raise Exception ("Accesing url property accessed before "
61
+ "invocation of read() or _get_metadata() methods" )
62
+
63
+ return [self .__url_data .format (
64
+ engine = self .__engines [s ],
65
+ market = self .__markets [s ],
66
+ symbol = s ) for s in self .symbols ]
61
67
62
68
def _get_params (self , start ):
63
69
params = {
@@ -78,96 +84,107 @@ def _get_params(self, start):
78
84
return params
79
85
80
86
def _get_metadata (self ):
81
- """ get a market and an engine for a given symbol """
82
- response = self ._get_response (
83
- self .__url_metadata .format (symbol = self .symbols )
84
- )
85
- text = self ._sanitize_response (response )
86
- if len (text ) == 0 :
87
- service = self .__class__ .__name__
88
- raise IOError ("{} request returned no data; check URL for invalid "
89
- "inputs: {}" .format (service , self .__url_metadata ))
90
- if isinstance (text , compat .binary_type ):
91
- text = text .decode ('windows-1251' )
92
- else :
93
- text = text
94
-
95
- header_str = 'secid;boardid;'
96
- get_data = False
97
- for s in text .splitlines ():
98
- if s .startswith (header_str ):
99
- get_data = True
100
- continue
101
- if get_data and s != '' :
102
- fields = s .split (';' )
103
- return fields [5 ], fields [7 ]
104
- service = self .__class__ .__name__
105
- raise IOError ("{} request returned no metadata: {}\n "
106
- "Typo in security symbol `{}`?" .format (
107
- service ,
108
- self .__url_metadata .format (symbol = self .symbols ),
109
- self .symbols
110
- )
111
- )
87
+ """Get markets and engines for the given symbols"""
88
+
89
+ markets , engines = {}, {}
90
+
91
+ for symbol in self .symbols :
92
+ response = self ._get_response (
93
+ self .__url_metadata .format (symbol = symbol )
94
+ )
95
+ text = self ._sanitize_response (response )
96
+ if len (text ) == 0 :
97
+ service = self .__class__ .__name__
98
+ raise IOError ("{} request returned no data; check URL for invalid "
99
+ "inputs: {}" .format (service , self .__url_metadata ))
100
+ if isinstance (text , pd .compat .binary_type ):
101
+ text = text .decode ('windows-1251' )
102
+
103
+ header_str = 'secid;boardid;'
104
+ get_data = False
105
+ for s in text .splitlines ():
106
+ if s .startswith (header_str ):
107
+ get_data = True
108
+ continue
109
+ if get_data and s != '' :
110
+ fields = s .split (';' )
111
+ markets [symbol ], engines [symbol ] = fields [5 ], fields [7 ]
112
+ break
113
+ if symbol not in markets or symbol not in engines :
114
+ raise IOError ("{} request returned no metadata: {}\n "
115
+ "Typo in the security symbol `{}`?" .format (
116
+ self .__class__ .__name__ ,
117
+ self .__url_metadata .format (symbol = symbol ),
118
+ symbol ))
119
+ return markets , engines
112
120
113
121
def read (self ):
114
122
"""Read data"""
115
- try :
116
- self .__market , self .__engine = self ._get_metadata ()
117
-
118
- out_list = []
119
- date_column = None
120
- while True : # read in loop with small date intervals
121
- if len (out_list ) > 0 :
122
- if date_column is None :
123
- date_column = out_list [0 ].split (';' ).index ('TRADEDATE' )
124
-
125
- # get the last downloaded date
126
- start_str = out_list [- 1 ].split (';' , 4 )[date_column ]
127
- start = dt .datetime .strptime (start_str , '%Y-%m-%d' ).date ()
128
- else :
129
- start_str = self .start .strftime ('%Y-%m-%d' )
130
- start = self .start
131
-
132
- if start >= self .end or start >= dt .date .today ():
133
- break
134
123
135
- params = self ._get_params (start_str )
136
- strings_out = self ._read_url_as_String (self .url , params ) \
137
- .splitlines ()[2 :]
138
- strings_out = list (filter (lambda x : x .strip (), strings_out ))
139
-
140
- if len (out_list ) == 0 :
141
- out_list = strings_out
142
- if len (strings_out ) < 101 :
143
- break
144
- else :
145
- out_list += strings_out [1 :] # remove CSV head line
146
- if len (strings_out ) < 100 :
124
+ try :
125
+ self .__markets , self .__engines = self ._get_metadata ()
126
+ urls = self .url # generate urls per symbols
127
+ dfs = [] # an array of pandas dataframes per symbol to concatenate
128
+
129
+ for i , symbol in enumerate (self .symbols ):
130
+ out_list = []
131
+ date_column = None
132
+
133
+ while True : # read in a loop with small date intervals
134
+ if len (out_list ) > 0 :
135
+ if date_column is None :
136
+ date_column = out_list [0 ].split (';' ).index ('TRADEDATE' )
137
+
138
+ # get the last downloaded date
139
+ start_str = out_list [- 1 ].split (';' , 4 )[date_column ]
140
+ start = dt .datetime .strptime (start_str , '%Y-%m-%d' ).date ()
141
+ else :
142
+ start_str = self .start .strftime ('%Y-%m-%d' )
143
+ start = self .start
144
+
145
+ if start >= self .end or start >= dt .date .today ():
147
146
break
148
- str_io = StringIO ('\r \n ' .join (out_list ))
149
- df = self ._read_lines (str_io )
150
- return df
147
+
148
+ params = self ._get_params (start_str )
149
+ strings_out = self ._read_url_as_String (urls [i ], params ) \
150
+ .splitlines ()[2 :]
151
+ strings_out = list (filter (lambda x : x .strip (), strings_out ))
152
+
153
+ if len (out_list ) == 0 :
154
+ out_list = strings_out
155
+ if len (strings_out ) < 101 : # all data received - break
156
+ break
157
+ else :
158
+ out_list += strings_out [1 :] # remove a CSV head line
159
+ if len (strings_out ) < 100 : # all data recevied - break
160
+ break
161
+ str_io = StringIO ('\r \n ' .join (out_list ))
162
+ dfs .append (self ._read_lines (str_io )) # add a new DataFrame
151
163
finally :
152
164
self .close ()
153
165
166
+ if len (dfs ) > 1 :
167
+ return pd .concat (dfs , axis = 0 , join = 'outer' , sort = True )
168
+ else :
169
+ return dfs [0 ]
170
+
154
171
def _read_url_as_String (self , url , params = None ):
155
- """ Open url (and retry) """
172
+ """ Open an url (and retry) """
173
+
156
174
response = self ._get_response (url , params = params )
157
175
text = self ._sanitize_response (response )
158
176
if len (text ) == 0 :
159
177
service = self .__class__ .__name__
160
178
raise IOError ("{} request returned no data; check URL for invalid "
161
179
"inputs: {}" .format (service , self .url ))
162
- if isinstance (text , compat .binary_type ):
163
- out = text .decode ('windows-1251' )
164
- else :
165
- out = text
166
- return out
180
+ if isinstance (text , pd .compat .binary_type ):
181
+ text = text .decode ('windows-1251' )
182
+ return text
167
183
168
184
def _read_lines (self , input ):
169
- """ return pandas DataFrame from input """
170
- rs = read_csv (input , index_col = 'TRADEDATE' , parse_dates = True , sep = ';' ,
185
+ """ Return a pandas DataFrame from input """
186
+
187
+ rs = pd .read_csv (input , index_col = 'TRADEDATE' , parse_dates = True , sep = ';' ,
171
188
na_values = ('-' , 'null' ))
172
189
# Get rid of unicode characters in index name.
173
190
try :
0 commit comments