-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstep2t3_update_raw.py
More file actions
139 lines (114 loc) · 5.32 KB
/
step2t3_update_raw.py
File metadata and controls
139 lines (114 loc) · 5.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#to transform index to ln bounded form
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
from pathlib import Path
from garch_utils.getList import getItemNameFromJson,getParameterListFromJson
import itertools
import warnings
from garch_utils.inputForm import inputForm
def extractRaw(itemList,itemType):
filepath = {
"removed": "{}/updating/raw/".format(itemType),
"extracted": "{}/updating/extracted/".format(itemType),
"raw": "{}/original/raw/".format(itemType),
"new": "{}/updating/new/".format(itemType)
}
for f in filepath.values():
tempPath = Path(f)
tempPath.mkdir(parents=True, exist_ok=True)
for item in itemList:
#rawName = item + "_temp.csv"
#indexCSV = item + ".csv"
new = pd.read_csv("{}{}_temp.csv".format(filepath["new"],item), parse_dates=['Date'] , dayfirst=True, index_col=0 , na_values=["null"])
new = new.dropna() #remove null values
raw = pd.read_csv("{}{}.csv".format(filepath["raw"],item), parse_dates=['Date'] , dayfirst=True, index_col=0 , na_values=["null"])
result = pd.concat([raw,new])
result = result[~result.index.duplicated(keep='last')]
result.to_csv(filepath["removed"] + item + ".csv", sep=",", index=True)
result["Close"].to_csv(filepath["extracted"] + item + ".csv", sep=",", index=True, header=True)
def processRaw(params,itemType):
item = params[0]
SD = params[1]
day = params[2]
print(params)
try:
epsilon = float(params[4])
except:
epsilon = 0
averagedate = day
MAname = "{}MA".format(day)
SDstring = int(SD*100)
filestring = "tor{}_day{}_SD{}_".format(epsilon,day,SDstring)
names = item + ".csv"
filepath = {
"extracted": "{}/updating/extracted/".format(itemType),
"toanalysis": "{}/updating/tor{}/toanalysis/SD{}/day{}/".format(itemType,epsilon,SDstring,day),
"table": "{}/updating/tor{}/table/SD{}/day{}/".format(itemType,epsilon,SDstring,day),
"root": "{}/updating/tor{}/".format(itemType,epsilon),
}
for f in filepath.values():
tempPath = Path(f)
tempPath.mkdir(parents=True, exist_ok=True)
Si = pd.read_csv("{}{}.csv".format(filepath["extracted"],item), parse_dates=['Date'] , dayfirst=True, usecols=[0,1], index_col=0 , na_values=["null"])
Si = Si.dropna() #remove null values
Sm = Si.rolling(window=averagedate).mean() #rolling mean of <averagedate> data
Sm = Sm.dropna()
Sm.columns = [MAname]
Si = Si.drop(Si.index[0:averagedate-1]) #match two index#
normalize = Si.div(Sm[MAname], axis='index')
normalize.columns = ["Normalize"]
warnings.simplefilter("error")
#SU = (1+0.25*SD) * Sm[MAname]
#SL = (1-0.25*SD) * Sm[MAname]
#epsilon = 0.4
SU = pd.Series(Sm[MAname]*(1+0.25*SD), name = "S_U")
SL = pd.Series(Sm[MAname]*(1-0.25*SD), name = "S_L")
thickness = pd.Series(Sm[MAname]*(1-0.25*SD), name = "thickness").copy()
wide = 0
for i in range(0,len(Sm[MAname])):
if abs(Sm[MAname][i]) >= epsilon:
wide = 0.25*SD * Sm[MAname][i]
SU[i] = Sm[MAname][i] + abs(wide)
SL[i] = Sm[MAname][i] - abs(wide)
thickness[i] = abs(wide)
#SU = Sm[MAname].apply(lambda x: (1+0.25*SD) * x if (1+0.25*SD) * abs(x)>=epsilon else epsilon)
#SL = Sm[MAname].apply(lambda x: (1-0.25*SD) * x if (1+0.25*SD) * abs(x)>=epsilon else -epsilon)
try:
transformed = pd.DataFrame(-np.log((SU - Si.iloc[:,0])/(SU - SL)) )
#transformed_raw = pd.DataFrame(-np.log(((1+0.25*SD)*Sm[MAname]-Si.iloc[:,0])/(Sm[MAname]*(0.5*SD))))
except RuntimeWarning:
print('negative log encounted in ', params)
with open(filepath["root"] + itemType + "errorList.txt", "a+") as f:
f.writelines("{} {} {} {}\n".format(params[0], params[1], params[2],epsilon))
warnings.simplefilter("default")
transformed = pd.DataFrame(-np.log((SU - Si.iloc[:,0])/(SU - SL)) )
#transformed_raw = pd.DataFrame(-np.log(((1+0.25*SD)*Sm[MAname]-Si.iloc[:,0])/(Sm[MAname]*(0.5*SD))))
transformed.columns = ["bounded_x"]
#transformed_raw.columns = ["bounded_x_raw"]
table = pd.concat([Si, Sm, normalize,SU,SL,thickness,transformed], axis=1)
colname = list(table.columns)
colname[3] = "S_U"
colname[4] = "S_L"
table.columns = colname
#table.rename(columns={ table.columns[4]: "S_U" }, inplace = True)
transformed.to_csv(filepath["toanalysis"] + "bounded_" + filestring + names, sep=",", index=True)
table.to_csv(filepath["table"] + filestring + names, sep=",", index=True)
def updateRaw(itemType,region):
#sdList = [1.5,1.75,2,2.5]
#dayList = [30,50,60,90,120]
indexList = getItemNameFromJson(itemType,region)
if "bond" not in itemType:
extractRaw(indexList,itemType + region)
print("extractRaw done")
paramList = getParameterListFromJson(itemType,region)
#paramList = (("0175.HK",2,30,"stockHK"),("0175.HK",2,60,"stockHK"),("0175.HK",2,90,"stockHK"),("0175.HK",2,120,"stockHK"))
#print(paramList)
for param in paramList:
processRaw(param,itemType + region)
print("processRaw done")
return 0
if __name__ == '__main__':
itemType, region = inputForm()
updateRaw(itemType,region)