|
| 1 | +from pandas_datareader.base import _DailyBaseReader |
| 2 | +from pandas import read_csv, compat |
| 3 | +from pandas.compat import StringIO, bytes_to_str |
| 4 | +import datetime as dt |
| 5 | + |
| 6 | +class MoexReader(_DailyBaseReader): |
| 7 | + |
| 8 | + """ |
| 9 | + Returns DataFrame of historical stock prices from symbols, over date |
| 10 | + range, start to end. To avoid being penalized by Moex servers, |
| 11 | + pauses between downloading 'chunks' of symbols can be specified. |
| 12 | +
|
| 13 | + Parameters |
| 14 | + ---------- |
| 15 | + symbols : string, array-like object (list, tuple, Series), or DataFrame |
| 16 | + Single stock symbol (ticker), array-like object of symbols or |
| 17 | + DataFrame with index containing stock symbols. |
| 18 | + start : string, (defaults to '1/1/2010') |
| 19 | + Starting date, timestamp. Parses many different kind of date |
| 20 | + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') |
| 21 | + end : string, (defaults to today) |
| 22 | + Ending date, timestamp. Same format as starting date. |
| 23 | + retry_count : int, default 3 |
| 24 | + Number of times to retry query request. |
| 25 | + pause : int, default 0 |
| 26 | + Time, in seconds, to pause between consecutive queries of chunks. If |
| 27 | + single value given for symbol, represents the pause between retries. |
| 28 | + chunksize : int, default 25 |
| 29 | + Number of symbols to download consecutively before intiating pause. |
| 30 | + session : Session, default None |
| 31 | + requests.sessions.Session instance to be used |
| 32 | + """ |
| 33 | + |
| 34 | + def __init__(self, *args, **kwargs): |
| 35 | + super(MoexReader, self).__init__(*args, **kwargs) |
| 36 | + self.start = self.start.date() |
| 37 | + self.end_dt = self.end |
| 38 | + self.end = self.end.date() |
| 39 | + if not isinstance(self.symbols, compat.string_types): |
| 40 | + raise ValueError("Support for multiple symbols is not yet implemented.") |
| 41 | + |
| 42 | + __url_metadata = "https://iss.moex.com/iss/securities/{symbol}.csv" |
| 43 | + __url_data = "https://iss.moex.com/iss/history/engines/{engine}/markets/{market}/securities/{symbol}.csv" |
| 44 | + |
| 45 | + @property |
| 46 | + def url(self): |
| 47 | + return self.__url_data.format( |
| 48 | + engine = self.__engine, |
| 49 | + market = self.__market, |
| 50 | + symbol = self.symbols |
| 51 | + ) |
| 52 | + |
| 53 | + def _get_params(self, start): |
| 54 | + params = { |
| 55 | + 'iss.only': 'history', |
| 56 | + 'iss.dp': 'point', |
| 57 | + 'iss.df': '%Y-%m-%d', |
| 58 | + 'iss.tf': '%H:%M:%S', |
| 59 | + 'iss.dft': '%Y-%m-%d %H:%M:%S', |
| 60 | + 'iss.json': 'extended', |
| 61 | + 'callback': 'JSON_CALLBACK', |
| 62 | + 'from': start, |
| 63 | + 'till': self.end_dt.strftime('%Y-%m-%d'), |
| 64 | + 'limit': 100, |
| 65 | + 'start': 1, |
| 66 | + 'sort_order': 'TRADEDATE', |
| 67 | + 'sort_order_desc': 'asc' |
| 68 | + } |
| 69 | + return params |
| 70 | + |
| 71 | + def _get_metadata(self): |
| 72 | + """ get a market and an engine for a given symbol """ |
| 73 | + response = self._get_response(self.__url_metadata.format(symbol=self.symbols)) |
| 74 | + text = self._sanitize_response(response) |
| 75 | + if len(text) == 0: |
| 76 | + service = self.__class__.__name__ |
| 77 | + raise IOError("{} request returned no data; check URL for invalid " |
| 78 | + "inputs: {}".format(service, self.__url_metadata)) |
| 79 | + if isinstance(text, compat.binary_type): |
| 80 | + text = text.decode('windows-1251') |
| 81 | + else: |
| 82 | + text = text |
| 83 | + |
| 84 | + header_str = 'secid;boardid;' |
| 85 | + get_data = False |
| 86 | + for s in text.splitlines(): |
| 87 | + if s.startswith(header_str): |
| 88 | + get_data = True |
| 89 | + continue |
| 90 | + if get_data and s!='': |
| 91 | + fields = s.split(';') |
| 92 | + return fields[5], fields[7] |
| 93 | + service = self.__class__.__name__ |
| 94 | + raise IOError("{} request returned no metadata" |
| 95 | + ": {}\nTypo in security symbol `{}`?".format(service, self.__url_metadata.format(symbol=self.symbols), self.symbols)) |
| 96 | + |
| 97 | + def read(self): |
| 98 | + """ read data """ |
| 99 | + try: |
| 100 | + self.__market, self.__engine = self._get_metadata() |
| 101 | + |
| 102 | + end = self.end.strftime('%Y-%m-%d') |
| 103 | + out_list = [] |
| 104 | + date_column = None |
| 105 | + while True: # read in loop with small date intervals |
| 106 | + if len(out_list)>0: |
| 107 | + if date_column is None: |
| 108 | + date_column = out_list[0].split(';').index('TRADEDATE') |
| 109 | + |
| 110 | + start_str = out_list[-1].split(';', 4)[date_column] # get the last downloaded date |
| 111 | + start = dt.datetime.strptime(start_str, '%Y-%m-%d').date() |
| 112 | + else: |
| 113 | + start_str = self.start.strftime('%Y-%m-%d') |
| 114 | + start = self.start |
| 115 | + |
| 116 | + if start >= self.end or start>=dt.date.today(): |
| 117 | + break |
| 118 | + |
| 119 | + params = self._get_params(start_str) |
| 120 | + strings_out = self._read_url_as_String(self.url, params).splitlines()[2:] |
| 121 | + strings_out = list(filter(lambda x: x.strip(), strings_out)) |
| 122 | + |
| 123 | + if len(out_list) == 0: |
| 124 | + out_list = strings_out |
| 125 | + if len(strings_out) < 101: |
| 126 | + break |
| 127 | + else: |
| 128 | + out_list += strings_out[1:] # remove CSV head line |
| 129 | + if len(strings_out) < 100: |
| 130 | + break |
| 131 | + str_io = StringIO('\r\n'.join(out_list)) |
| 132 | + df = self._read_lines(str_io) |
| 133 | + return df |
| 134 | + finally: |
| 135 | + self.close() |
| 136 | + |
| 137 | + def _read_url_as_String(self, url, params=None): |
| 138 | + """ Open url (and retry) """ |
| 139 | + response = self._get_response(url, params=params) |
| 140 | + text = self._sanitize_response(response) |
| 141 | + if len(text) == 0: |
| 142 | + service = self.__class__.__name__ |
| 143 | + raise IOError("{} request returned no data; check URL for invalid " |
| 144 | + "inputs: {}".format(service, self.url)) |
| 145 | + if isinstance(text, compat.binary_type): |
| 146 | + out = text.decode('windows-1251') |
| 147 | + else: |
| 148 | + out = text |
| 149 | + return out |
| 150 | + |
| 151 | + def _read_lines(self, out): |
| 152 | + rs = read_csv(out, index_col='TRADEDATE', parse_dates=True, sep=';', |
| 153 | + na_values=('-', 'null')) |
| 154 | + # Get rid of unicode characters in index name. |
| 155 | + try: |
| 156 | + rs.index.name = rs.index.name.decode( |
| 157 | + 'unicode_escape').encode('ascii', 'ignore') |
| 158 | + except AttributeError: |
| 159 | + # Python 3 string has no decode method. |
| 160 | + rs.index.name = rs.index.name.encode('ascii', 'ignore').decode() |
| 161 | + return rs |
0 commit comments