1+ import datetime
12import re
23import arrow
34import pandas as pd
910from autoquant .mixin .data import IndexMixin , PriceMixin
1011from autoquant import Market , FundsIndex
1112
13+ from cachetools .func import ttl_cache
14+
1215
1316class EastmoneyProvider (PriceMixin , IndexMixin , Provider ):
1417 _UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
1518 _API_FUNDS_INDEX = "http://fund.eastmoney.com/js/fundcode_search.js"
19+ _API_FUNDS_DETAIL = "http://fundf10.eastmoney.com/jbgk_{}.html"
1620 _API_DAILY_PRICES = "http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={}&page={}&sdate={}&edate={}&per={}"
1721
22+ @ttl_cache (maxsize = 5000 , ttl = 60 * 60 * 24 )
1823 def daily_prices (self , market : Market , code : str , start : date , end : date , ** kwargs ):
19- def __html (fund_code , start_date , end_date , page = 1 , per = 20 ):
24+ def __html (fund_code , start_date , end_date , page = 1 , per = 40 ):
2025 url = self ._API_DAILY_PRICES .format (fund_code , page , start_date , end_date , per )
21- HTML = requests .get (url , headers = {'User-Agent' : self ._UA })
26+ HTML = requests .get (url , headers = {'User-Agent' : self ._UA })
2227 HTML .encoding = "utf-8"
2328 page_cnt = re .findall (r'pages:(.*),' , HTML .text )[0 ]
2429 return HTML , int (page_cnt )
@@ -47,24 +52,43 @@ def __parse(HTML):
4752 df_ = __parse (html )
4853 res_df = pd .concat ([res_df , df_ ])
4954
50- df = pd .DataFrame ({
51- 'market' : market ,
52- 'code' : code ,
53- 'datetime' : res_df ['净值日期' ].astype ('datetime64[ns]' ),
54- 'close' : res_df ['单位净值' ].astype (float ),
55- 'close_acc' : res_df ['累计净值' ].astype (float ),
56- 'pct_change' : res_df ['日增长率' ].map (lambda x : x .strip ('%' )).astype (float ),
57- 'status_purchase' : res_df ['申购状态' ].map (lambda x : 'OPEN' if '开放' in x else 'CLOSE' ),
58- 'status_redeem' : res_df ['赎回状态' ].map (lambda x : 'OPEN' if '开放' in x else 'CLOSE' )
59- })
55+ if res_df .size :
56+ df = pd .DataFrame ({
57+ 'market' : market ,
58+ 'code' : code ,
59+ 'datetime' : res_df ['净值日期' ].map (lambda x : x .strip ('*' )).astype ('datetime64[ns]' ),
60+ 'close' : res_df ['单位净值' ].astype (float , errors = 'ignore' ),
61+ 'close_acc' : res_df ['累计净值' ].astype (float , errors = 'ignore' ),
62+ 'pct_change' : res_df ['日增长率' ].map (lambda x : x .strip ('%' )).astype (float , errors = 'ignore' ),
63+ 'status_purchase' : res_df ['申购状态' ].map (lambda x : 'OPEN' if '开放' in x else 'CLOSE' ),
64+ 'status_redeem' : res_df ['赎回状态' ].map (lambda x : 'OPEN' if '开放' in x else 'CLOSE' )
65+ })
66+ else :
67+ df = pd .DataFrame ([], columns = ['market' , 'code' , 'datetime' , 'close' , 'close_acc' , 'pct_change' , 'status_purchase' , 'status_redeem' ])
68+
6069 df .index = df ['datetime' ]
6170 return df
6271
72+ @ttl_cache (maxsize = 5000 , ttl = 60 * 60 * 24 )
6373 def funds_of_index (self , index : FundsIndex , ** kwargs ):
64- '''
65- get all funds via api: http://fund.eastmoney.com/js/fundcode_search.js
66- '''
67- res = requests .get (self ._API_FUNDS_INDEX , headers = {'User-Agent' : self ._UA })
74+ @ttl_cache (maxsize = 5000 , ttl = 60 * 60 * 24 )
75+ def __detail (code ):
76+ response = requests .get (self ._API_FUNDS_DETAIL .format (code ))
77+ soup = BeautifulSoup (response .text , 'lxml' )
78+ table = soup .find_all ("table" )[1 ].find_all ("td" )
79+
80+ return [
81+ table [2 ].get_text ().replace ("(前端)" , "" ), # 基金代码
82+ table [8 ].get_text (), # 基金公司
83+ table [10 ].get_text (), # 基金经理
84+ table [5 ].get_text ().split ("/" )[0 ].replace ('年' , '-' ).replace ('月' , '-' ).replace ('日' , '' ).strip (), # 创建时间
85+ table [5 ].get_text ().split ("/" )[1 ].strip ().replace ("亿份" , "" ), # 基金份额
86+ table [3 ].get_text (), # 基金类型
87+ table [18 ].get_text (), # 业绩基准
88+ table [19 ].get_text (), # 跟踪标的
89+ ]
90+
91+ res = requests .get (self ._API_FUNDS_INDEX , headers = {'User-Agent' : self ._UA })
6892 res .encoding = "utf-8"
6993 list_ = eval (re .findall (r'\[.*\]' , res .text )[0 ])
7094 df = pd .DataFrame (list_ )
@@ -76,8 +100,15 @@ def funds_of_index(self, index: FundsIndex, **kwargs):
76100 'code' : df ['code' ],
77101 'name' : df ['name' ],
78102 })
79-
80- return {
103+ filtered = {
81104 FundsIndex .CN_ALL : lambda : all ,
82- FundsIndex .CN_ETF : lambda : all [all ['name' ].str .contains ('ETF' )]
105+ FundsIndex .CN_ETF : lambda : all [all ['name' ].str .contains ('ETF' )],
106+ FundsIndex .CN_QDII : lambda : all [all ['name' ].str .contains ('QDII' )]
83107 }[index ]()
108+
109+ if not kwargs .get ('details' , False ):
110+ return filtered
111+
112+ details = [__detail (row ['code' ]) for _ , row in filtered .iterrows ()]
113+ details = pd .DataFrame (details , columns = ['code' , 'company' , 'manager' , 'created_at' , 'share' , 'type' , 'benchmark' , 'tracking' ])
114+ return pd .merge (filtered , details , on = "code" )
0 commit comments