-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathforcast2.py
More file actions
102 lines (77 loc) · 3.29 KB
/
forcast2.py
File metadata and controls
102 lines (77 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import pandas as pd # 数据处理
import numpy as np # 使用数组
import matplotlib.pyplot as plt # 可视化
from matplotlib import rcParams # 图大小
from termcolor import colored as cl # 文本自定义
from sklearn.tree import DecisionTreeClassifier as dtc # 树算法
from sklearn.model_selection import train_test_split # 拆分数据
from sklearn.metrics import accuracy_score # 模型准确度
from sklearn.tree import plot_tree # 树图
from time import time
from sklearn.metrics import f1_score
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
import joblib
def train_classifier(clf, X_train, y_train):
''' 训练模型 '''
# 记录训练时长
start = time()
clf.fit(X_train, y_train)
end = time()
print("训练时间 {:.4f} 秒".format(end - start))
def predict_labels(clf, features, target):
''' 使用模型进行预测 '''
# 记录预测时长
start = time()
y_pred = clf.predict(features)
#print(y_pred)
end = time()
print("预测时间 in {:.4f} 秒".format(end - start))
return f1_score(target, y_pred, average='macro'), sum(target == y_pred) / float(len(y_pred))
def train_predict(clf, X_train, y_train, X_test, y_test):
''' 训练并评估模型 '''
# Indicate the classifier and the training set size
print("训练 {} 模型,样本数量 {}。".format(clf.__class__.__name__, len(X_train)))
# 训练模型
train_classifier(clf, X_train, y_train)
# 在测试集上评估模型
f1, acc = predict_labels(clf, X_train, y_train)
print("训练集上的 F1 分数和准确率为: {:.4f} , {:.4f}。".format(f1, acc))
f1, acc = predict_labels(clf, X_test, y_test)
print("测试集上的 F1 分数和准确率为: {:.4f} , {:.4f}。".format(f1, acc))
rcParams['figure.figsize'] = (25, 20)
df = pd.read_csv('matchData.csv')
print(cl(df.head(), attrs = ['bold']))
df.info()
print(cl(df, attrs = ['bold']))
X_var = df[['fightRateDif', 'fightGoalDif', 'recentRateDif', 'recentGoalDif', 'leagueRateDif', 'leagueGoapDif',
'supportDif']].values # 自变量
y_var = df['result'].values # 因变量
#print(cl('X variable samples : {}'.format(X_var[:5]), attrs = ['bold']))
#print(cl('Y variable samples : {}'.format(y_var[:5]), attrs = ['bold']))
X_train, X_test, y_train, y_test = train_test_split(X_var, y_var, test_size = 0.1, random_state = 0)
'''
temp1, temp2 = np.split(X_train, [len(X_train) // 10])
X_test = np.concatenate([X_test, temp1], axis=0)
print(len(X_test))
temp3, temp4 = np.split(y_train, [len(y_train) // 10])
y_test = np.concatenate([y_test, temp3], axis=0)
print(len(y_test))
'''
# 分别建立三个模型
clf_A = LogisticRegression(random_state=42)
clf_B = SVC(random_state=42, kernel='rbf', gamma='auto')
clf_C = xgb.XGBClassifier(seed=42)
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)
train_predict(clf_A, X_train, y_train, X_test, y_test)
print('')
train_predict(clf_B, X_train, y_train, X_test, y_test)
print('')
train_predict(clf_C, X_train, y_train, X_test, y_test)
print('')
joblib.dump(clf_C, 'predict2.model')
#model = joblib.load('predict.model')