-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathgrouping.py
More file actions
56 lines (48 loc) · 2.39 KB
/
grouping.py
File metadata and controls
56 lines (48 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
import numpy as np
from settings import *
from Io import *
from tqdm import tqdm
class sentimentGroup:
io = CsvIo()
_tiles = Tiles
_sentiment = io.readData("st_SENTIMENT")
_factors = pd.merge(_sentiment, io.readData("fe_factors"), on=['SgnYear'])
_cols = [c for c in _factors.columns.tolist()[4:-1] if 'dummy' not in c]
def __init__(self, col=None):
if col is None:
col = 'GS'
self.col = col
self._factors['MV'] = self._factors['MarketValue']
self._factors = self._factors[['SgnYear', 'Stkcd', 'Return', 'SENTIMENT', 'MV', col]].copy()
self.stockReturn = self.io.readData("fe_monthlyReturn")
self.stockReturn['month'] = self.stockReturn['tradeDate'].str[5:7].astype(int)
def getFactors(self):
for item in self._cols:
yield item
def fetch(self):
self._factors['positiveSentiment'] = self._factors['SENTIMENT'] > 0
tqdm.pandas(desc="bar")
data = self._factors.sort_values(by=["SgnYear", 'positiveSentiment', self.col]). \
groupby(["SgnYear", 'positiveSentiment']). \
progress_apply(
lambda x: pd.Series([self.getVMReturn(x.iloc[int(len(x) / Tiles * i):int(len(x) / Tiles * (i + 1))])
for i in range(Tiles)])).reset_index()
data = data.drop('SgnYear', axis=1).groupby("positiveSentiment").apply(lambda x: x.mean())
data = data.rename(columns={x: - x - 1 for x in data.columns.tolist() if not isinstance(x, str)})
data = data.rename(columns={x: - x for x in data.columns.tolist() if not isinstance(x, str)})
data['10-1'] = data[10] - data[1]
data['10-5'] = data[10] - data[5]
data['5-1'] = data[5] - data[1]
data = data.drop('positiveSentiment', axis=1).reset_index()
self.io.saveData("group_{}".format(self.col).replace("/", "").replace("+", ""), data)
return data
def getVMReturn(self, df):
codeList = df[['SgnYear', 'Stkcd', 'MV']].drop_duplicates()
codeList = pd.merge(codeList, self.stockReturn, on=['SgnYear', 'Stkcd'])
codeList = codeList.groupby("month").apply(lambda x: np.sum(x['Close'] * x['MV'] / x['MV'].sum())).mean() * 100
return codeList
if __name__ == '__main__':
print(sentimentGroup('MarketValue').fetch())
for c in sentimentGroup().getFactors():
print(sentimentGroup(c).fetch())