|
| 1 | +import re |
| 2 | +import arrow |
| 3 | +import pandas as pd |
| 4 | +from datetime import date |
| 5 | +import requests |
| 6 | +from bs4 import BeautifulSoup |
| 7 | + |
| 8 | +from . import Provider |
| 9 | +from autoquant.mixin.data import IndexMixin, PriceMixin |
| 10 | +from autoquant import Market, FundsIndex |
| 11 | + |
| 12 | + |
| 13 | +class EastmoneyProvider(PriceMixin, IndexMixin, Provider): |
| 14 | + _UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36' |
| 15 | + _API_FUNDS_INDEX = "http://fund.eastmoney.com/js/fundcode_search.js" |
| 16 | + _API_DAILY_PRICES = "http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={}&page={}&sdate={}&edate={}&per={}" |
| 17 | + |
| 18 | + def daily_prices(self, market: Market, code: str, start: date, end: date, **kwargs): |
| 19 | + def __html(fund_code, start_date, end_date, page=1, per=20): |
| 20 | + url = self._API_DAILY_PRICES.format(fund_code, page, start_date, end_date, per) |
| 21 | + HTML = requests.get(url, headers={'User-Agent': self._UA}) |
| 22 | + HTML.encoding = "utf-8" |
| 23 | + page_cnt = re.findall(r'pages:(.*),', HTML.text)[0] |
| 24 | + return HTML, int(page_cnt) |
| 25 | + |
| 26 | + def __parse(HTML): |
| 27 | + soup = BeautifulSoup(HTML.text, 'html.parser') |
| 28 | + trs = soup.find_all("tr") |
| 29 | + res = [] |
| 30 | + for tr in trs[1:]: |
| 31 | + date = tr.find_all("td")[0].text # 净值日期 |
| 32 | + unit_net = tr.find_all("td")[1].text # 单位净值 |
| 33 | + acc_net = tr.find_all("td")[2].text # 累计净值 |
| 34 | + fund_r = tr.find_all("td")[3].text # 日增长率 |
| 35 | + buy_status = tr.find_all("td")[4].text # 申购状态 |
| 36 | + sell_status = tr.find_all("td")[5].text # 赎回状态 |
| 37 | + res.append([date, unit_net, acc_net, fund_r, buy_status, sell_status]) |
| 38 | + df = pd.DataFrame(res, columns=['净值日期', '单位净值', '累计净值', '日增长率', '申购状态', '赎回状态']) |
| 39 | + |
| 40 | + return df |
| 41 | + |
| 42 | + assert market == Market.CN, 'only Market.CN is supported in EastmoneyProvider::daily_prices' |
| 43 | + html, pages = __html(code, start, end) |
| 44 | + res_df = pd.DataFrame() |
| 45 | + for page in range(pages): |
| 46 | + html, _ = __html(code, start, end, page=page + 1) |
| 47 | + df_ = __parse(html) |
| 48 | + res_df = pd.concat([res_df, df_]) |
| 49 | + |
| 50 | + df = pd.DataFrame({ |
| 51 | + 'market': market, |
| 52 | + 'code': code, |
| 53 | + 'datetime': res_df['净值日期'].astype('datetime64[ns]'), |
| 54 | + 'close': res_df['单位净值'].astype(float), |
| 55 | + 'close_acc': res_df['累计净值'].astype(float), |
| 56 | + 'pct_change': res_df['日增长率'].map(lambda x: x.strip('%')).astype(float), |
| 57 | + 'status_purchase': res_df['申购状态'].map(lambda x: 'OPEN' if '开放' in x else 'CLOSE'), |
| 58 | + 'status_redeem': res_df['赎回状态'].map(lambda x: 'OPEN' if '开放' in x else 'CLOSE') |
| 59 | + }) |
| 60 | + df.index = df['datetime'] |
| 61 | + return df |
| 62 | + |
| 63 | + def funds_of_index(self, index: FundsIndex, **kwargs): |
| 64 | + ''' |
| 65 | + get all funds via api: http://fund.eastmoney.com/js/fundcode_search.js |
| 66 | + ''' |
| 67 | + res = requests.get(self._API_FUNDS_INDEX, headers={'User-Agent': self._UA}) |
| 68 | + res.encoding = "utf-8" |
| 69 | + list_ = eval(re.findall(r'\[.*\]', res.text)[0]) |
| 70 | + df = pd.DataFrame(list_) |
| 71 | + df.columns = ['code', 'logogram', 'name', 'type', 'name_spell'] |
| 72 | + |
| 73 | + all = pd.DataFrame({ |
| 74 | + 'updated_at': arrow.now().format('YYYY-MM-DD'), |
| 75 | + 'market': Market.CN, |
| 76 | + 'code': df['code'], |
| 77 | + 'name': df['name'], |
| 78 | + }) |
| 79 | + |
| 80 | + return { |
| 81 | + FundsIndex.CN_ALL: lambda: all, |
| 82 | + FundsIndex.CN_ETF: lambda: all[all['name'].str.contains('ETF')] |
| 83 | + }[index]() |
0 commit comments