|
| 1 | +from datetime import datetime |
| 2 | +from xml.etree import ElementTree |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +from pandas import DataFrame, to_datetime |
| 6 | +from pandas_datareader.base import _DailyBaseReader |
| 7 | +from six import string_types |
| 8 | + |
| 9 | + |
| 10 | +class NaverDailyReader(_DailyBaseReader): |
| 11 | + """Fetches daily historical data from Naver Finance. |
| 12 | +
|
| 13 | + :param symbols: A single symbol; multiple symbols are not currently supported. |
| 14 | + :param adjust_price: Not implemented |
| 15 | + :param interval: Not implemented |
| 16 | + :param adjust_dividends: Not implemented |
| 17 | + """ |
| 18 | + |
| 19 | + def __init__( |
| 20 | + self, |
| 21 | + symbols=None, |
| 22 | + start=None, |
| 23 | + end=None, |
| 24 | + retry_count=3, |
| 25 | + pause=0.1, |
| 26 | + session=None, |
| 27 | + adjust_price=False, |
| 28 | + ret_index=False, |
| 29 | + chunksize=1, |
| 30 | + interval="d", |
| 31 | + get_actions=False, |
| 32 | + adjust_dividends=True, |
| 33 | + ): |
| 34 | + if not isinstance(symbols, string_types): |
| 35 | + raise NotImplementedError("Bulk-fetching is not implemented") |
| 36 | + |
| 37 | + super(NaverDailyReader, self).__init__( |
| 38 | + symbols=symbols, |
| 39 | + start=start, |
| 40 | + end=end, |
| 41 | + retry_count=retry_count, |
| 42 | + pause=pause, |
| 43 | + session=session, |
| 44 | + chunksize=chunksize, |
| 45 | + ) |
| 46 | + |
| 47 | + self.headers = { |
| 48 | + "Sec-Fetch-Mode": "no-cors", |
| 49 | + "Referer": "https://finance.naver.com/item/fchart.nhn?code={}".format( |
| 50 | + symbols |
| 51 | + ), |
| 52 | + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36", # noqa |
| 53 | + } |
| 54 | + |
| 55 | + @property |
| 56 | + def get_actions(self): |
| 57 | + return self._get_actions |
| 58 | + |
| 59 | + @property |
| 60 | + def url(self): |
| 61 | + return "https://fchart.stock.naver.com/sise.nhn" |
| 62 | + |
| 63 | + def _get_params(self, symbol): |
| 64 | + # NOTE: The server does not take start, end dates as inputs; it only |
| 65 | + # takes the number of trading days as an input. To circumvent this |
| 66 | + # pitfall, we calculate the number of business days between self.start |
| 67 | + # and the current date. And then we filter by self.end before returning |
| 68 | + # the final result (in _read_one_data()). |
| 69 | + days = np.busday_count(self.start.date(), datetime.now().date()) |
| 70 | + params = {"symbol": symbol, "timeframe": "day", "count": days, "requestType": 0} |
| 71 | + return params |
| 72 | + |
| 73 | + def _read_one_data(self, url, params): |
| 74 | + """Read one data from specified symbol. |
| 75 | +
|
| 76 | + :rtype: DataFrame |
| 77 | + """ |
| 78 | + resp = self._get_response(url, params=params) |
| 79 | + parsed = self._parse_xml_response(resp.text) |
| 80 | + prices = DataFrame( |
| 81 | + parsed, columns=["Date", "Open", "High", "Low", "Close", "Volume"] |
| 82 | + ) |
| 83 | + prices["Date"] = to_datetime(prices["Date"]) |
| 84 | + prices = prices.set_index("Date") |
| 85 | + |
| 86 | + # NOTE: See _get_params() for explanations. |
| 87 | + return prices[(prices.index >= self.start) & (prices.index <= self.end)] |
| 88 | + |
| 89 | + def _parse_xml_response(self, xml_content): |
| 90 | + """Parses XML response from the server. |
| 91 | +
|
| 92 | + An example of response: |
| 93 | +
|
| 94 | + <?xml version="1.0" encoding="EUC-KR" ?> |
| 95 | + <protocol> |
| 96 | + <chartdata symbol="005930" name="Samsung Elctronics" count="500" |
| 97 | + timeframe="day" precision="0" origintime="19900103"> |
| 98 | + <item data="20170918|218500|222000|217000|220500|72124" /> |
| 99 | + <item data="20170919|218000|221000|217500|219000|62753" /> |
| 100 | + ... |
| 101 | + </protocol> |
| 102 | + """ |
| 103 | + root = ElementTree.fromstring(xml_content) |
| 104 | + items = root.findall("chartdata/item") |
| 105 | + |
| 106 | + for item in items: |
| 107 | + yield item.attrib["data"].split("|") |
0 commit comments