add example

Marcnuth · Marcnuth · commit 4abaee7e6105 · 2023-02-02T16:28:44.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -128,4 +128,8 @@ dmypy.json
 # Pyre type checker
 .pyre/
 
-.vscode/
+.vscode/
+
+.output.*
+.DS_Store
+.jupyter
diff --git a/.pep8 b/.pep8
@@ -1,2 +1,3 @@
 [pep8]
-max-line-length = 160
+max-line-length = 160
+ignore = E731
diff --git a/autoquant/__init__.py b/autoquant/__init__.py
@@ -31,3 +31,4 @@ class FundsIndex(Enum):
     # A股
     CN_ETF = auto()  # A股ETF指数基金
     CN_ALL = auto()  # A股所有基金
+    CN_QDII = auto()
diff --git a/autoquant/provider/eastmoney.py b/autoquant/provider/eastmoney.py
@@ -1,3 +1,4 @@
+import datetime
 import re
 import arrow
 import pandas as pd
@@ -9,16 +10,20 @@
 from autoquant.mixin.data import IndexMixin, PriceMixin
 from autoquant import Market, FundsIndex
 
+from cachetools.func import ttl_cache
+
 
 class EastmoneyProvider(PriceMixin, IndexMixin, Provider):
     _UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
     _API_FUNDS_INDEX = "http://fund.eastmoney.com/js/fundcode_search.js"
+    _API_FUNDS_DETAIL = "http://fundf10.eastmoney.com/jbgk_{}.html"
     _API_DAILY_PRICES = "http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={}&page={}&sdate={}&edate={}&per={}"
 
+    @ttl_cache(maxsize=5000, ttl=60 * 60 * 24)
     def daily_prices(self, market: Market, code: str, start: date, end: date, **kwargs):
-        def __html(fund_code, start_date, end_date, page=1, per=20):
+        def __html(fund_code, start_date, end_date, page=1, per=40):
             url = self._API_DAILY_PRICES.format(fund_code, page, start_date, end_date, per)
-            HTML = requests.get(url, headers={'User-Agent':  self._UA})
+            HTML = requests.get(url, headers={'User-Agent': self._UA})
             HTML.encoding = "utf-8"
             page_cnt = re.findall(r'pages:(.*),', HTML.text)[0]
             return HTML, int(page_cnt)
@@ -47,24 +52,43 @@ def __parse(HTML):
             df_ = __parse(html)
             res_df = pd.concat([res_df, df_])
 
-        df = pd.DataFrame({
-            'market': market,
-            'code': code,
-            'datetime': res_df['净值日期'].astype('datetime64[ns]'),
-            'close': res_df['单位净值'].astype(float),
-            'close_acc': res_df['累计净值'].astype(float),
-            'pct_change': res_df['日增长率'].map(lambda x: x.strip('%')).astype(float),
-            'status_purchase': res_df['申购状态'].map(lambda x: 'OPEN' if '开放' in x else 'CLOSE'),
-            'status_redeem': res_df['赎回状态'].map(lambda x: 'OPEN' if '开放' in x else 'CLOSE')
-        })
+        if res_df.size:
+            df = pd.DataFrame({
+                'market': market,
+                'code': code,
+                'datetime': res_df['净值日期'].map(lambda x: x.strip('*')).astype('datetime64[ns]'),
+                'close': res_df['单位净值'].astype(float, errors='ignore'),
+                'close_acc': res_df['累计净值'].astype(float, errors='ignore'),
+                'pct_change': res_df['日增长率'].map(lambda x: x.strip('%')).astype(float, errors='ignore'),
+                'status_purchase': res_df['申购状态'].map(lambda x: 'OPEN' if '开放' in x else 'CLOSE'),
+                'status_redeem': res_df['赎回状态'].map(lambda x: 'OPEN' if '开放' in x else 'CLOSE')
+            })
+        else:
+            df = pd.DataFrame([], columns=['market', 'code', 'datetime', 'close', 'close_acc', 'pct_change', 'status_purchase', 'status_redeem'])
+
         df.index = df['datetime']
         return df
 
+    @ttl_cache(maxsize=5000, ttl=60 * 60 * 24)
     def funds_of_index(self, index: FundsIndex, **kwargs):
-        '''
-            get all funds via api: http://fund.eastmoney.com/js/fundcode_search.js
-        '''
-        res = requests.get(self._API_FUNDS_INDEX, headers={'User-Agent':  self._UA})
+        @ttl_cache(maxsize=5000, ttl=60 * 60 * 24)
+        def __detail(code):
+            response = requests.get(self._API_FUNDS_DETAIL.format(code))
+            soup = BeautifulSoup(response.text, 'lxml')
+            table = soup.find_all("table")[1].find_all("td")
+
+            return [
+                table[2].get_text().replace("（前端）", ""),  # 基金代码
+                table[8].get_text(),  # 基金公司
+                table[10].get_text(),  # 基金经理
+                table[5].get_text().split("/")[0].replace('年', '-').replace('月', '-').replace('日', '').strip(),  # 创建时间
+                table[5].get_text().split("/")[1].strip().replace("亿份", ""),  # 基金份额
+                table[3].get_text(),  # 基金类型
+                table[18].get_text(),  # 业绩基准
+                table[19].get_text(),  # 跟踪标的
+            ]
+
+        res = requests.get(self._API_FUNDS_INDEX, headers={'User-Agent': self._UA})
         res.encoding = "utf-8"
         list_ = eval(re.findall(r'\[.*\]', res.text)[0])
         df = pd.DataFrame(list_)
@@ -76,8 +100,15 @@ def funds_of_index(self, index: FundsIndex, **kwargs):
             'code': df['code'],
             'name': df['name'],
         })
-
-        return {
+        filtered = {
             FundsIndex.CN_ALL: lambda: all,
-            FundsIndex.CN_ETF: lambda: all[all['name'].str.contains('ETF')]
+            FundsIndex.CN_ETF: lambda: all[all['name'].str.contains('ETF')],
+            FundsIndex.CN_QDII: lambda: all[all['name'].str.contains('QDII')]
         }[index]()
+
+        if not kwargs.get('details', False):
+            return filtered
+
+        details = [__detail(row['code']) for _, row in filtered.iterrows()]
+        details = pd.DataFrame(details, columns=['code', 'company', 'manager', 'created_at', 'share', 'type', 'benchmark', 'tracking'])
+        return pd.merge(filtered, details, on="code")
diff --git a/examples/zh-cn/CN_ETF.funds.csv b/examples/zh-cn/CN_ETF.funds.csv
diff --git a/examples/zh-cn/行业轮动.ipynb b/examples/zh-cn/行业轮动.ipynb